aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile.inc9
-rw-r--r--tools/genmacro/Makefile.inc14
-rw-r--r--tools/genmacro/genmacro.c134
-rw-r--r--tools/genperf/Makefile.inc42
-rw-r--r--tools/genperf/genperf.c540
-rw-r--r--tools/genperf/perfect.c1178
-rw-r--r--tools/genperf/perfect.h132
-rw-r--r--tools/genperf/standard.h35
-rw-r--r--tools/python-yasm/Makefile.inc79
-rw-r--r--tools/python-yasm/bytecode.pxi107
-rw-r--r--tools/python-yasm/errwarn.pxi73
-rw-r--r--tools/python-yasm/expr.pxi136
-rw-r--r--tools/python-yasm/floatnum.pxi49
-rw-r--r--tools/python-yasm/intnum.pxi170
-rwxr-xr-xtools/python-yasm/pyxelator/cparse.py819
-rwxr-xr-xtools/python-yasm/pyxelator/genpyx.py530
-rwxr-xr-xtools/python-yasm/pyxelator/ir.py1163
-rwxr-xr-xtools/python-yasm/pyxelator/lexer.py248
-rwxr-xr-xtools/python-yasm/pyxelator/node.py301
-rwxr-xr-xtools/python-yasm/pyxelator/parse_core.py106
-rwxr-xr-xtools/python-yasm/pyxelator/work_unit.py192
-rwxr-xr-xtools/python-yasm/pyxelator/wrap_yasm.py57
-rw-r--r--tools/python-yasm/setup.py88
-rw-r--r--tools/python-yasm/symrec.pxi285
-rw-r--r--tools/python-yasm/tests/Makefile.inc13
-rw-r--r--tools/python-yasm/tests/__init__.py69
-rwxr-xr-xtools/python-yasm/tests/python_test.sh19
-rw-r--r--tools/python-yasm/tests/test_bytecode.py3
-rw-r--r--tools/python-yasm/tests/test_expr.py18
-rw-r--r--tools/python-yasm/tests/test_intnum.py77
-rw-r--r--tools/python-yasm/tests/test_symrec.py80
-rw-r--r--tools/python-yasm/value.pxi56
-rw-r--r--tools/python-yasm/yasm.pyx137
-rw-r--r--tools/re2c/CHANGELOG22
-rw-r--r--tools/re2c/Makefile.inc93
-rw-r--r--tools/re2c/NO_WARRANTY2
-rw-r--r--tools/re2c/README153
-rw-r--r--tools/re2c/actions.c692
-rw-r--r--tools/re2c/basics.h14
-rw-r--r--tools/re2c/bootstrap/scanner.c748
-rw-r--r--tools/re2c/code.c969
-rw-r--r--tools/re2c/dfa.c253
-rw-r--r--tools/re2c/dfa.h173
-rw-r--r--tools/re2c/doc/loplas.ps.gzbin0 -> 69080 bytes
-rw-r--r--tools/re2c/doc/sample.bib48
-rw-r--r--tools/re2c/examples/basemmap.c26
-rw-r--r--tools/re2c/examples/c.re272
-rw-r--r--tools/re2c/examples/cmmap.re267
-rw-r--r--tools/re2c/examples/cnokw.re239
-rw-r--r--tools/re2c/examples/cunroll.re258
-rw-r--r--tools/re2c/examples/modula.re202
-rw-r--r--tools/re2c/examples/repeater.re42
-rw-r--r--tools/re2c/examples/rexx/README1
-rw-r--r--tools/re2c/examples/rexx/rexx.l319
-rw-r--r--tools/re2c/examples/rexx/scanio.c41
-rw-r--r--tools/re2c/examples/sample.re7
-rw-r--r--tools/re2c/examples/simple.re13
-rw-r--r--tools/re2c/globals.h26
-rw-r--r--tools/re2c/ins.h40
-rw-r--r--tools/re2c/main.c196
-rw-r--r--tools/re2c/mbo_getopt.c194
-rw-r--r--tools/re2c/mbo_getopt.h22
-rw-r--r--tools/re2c/parse.h29
-rw-r--r--tools/re2c/parser.c249
-rw-r--r--tools/re2c/parser.h33
-rw-r--r--tools/re2c/re.h191
-rw-r--r--tools/re2c/re2c.1536
-rw-r--r--tools/re2c/scanner.c748
-rw-r--r--tools/re2c/scanner.h44
-rw-r--r--tools/re2c/scanner.re241
-rw-r--r--tools/re2c/substr.c65
-rw-r--r--tools/re2c/substr.h89
-rw-r--r--tools/re2c/token.h30
-rw-r--r--tools/re2c/translate.c61
74 files changed, 14607 insertions, 0 deletions
diff --git a/tools/Makefile.inc b/tools/Makefile.inc
new file mode 100644
index 0000000..dbc67fc
--- /dev/null
+++ b/tools/Makefile.inc
@@ -0,0 +1,9 @@
+EXTRA_DIST += tools/re2c/Makefile.inc
+EXTRA_DIST += tools/genmacro/Makefile.inc
+EXTRA_DIST += tools/genperf/Makefile.inc
+EXTRA_DIST += tools/python-yasm/Makefile.inc
+
+include tools/re2c/Makefile.inc
+include tools/genmacro/Makefile.inc
+include tools/genperf/Makefile.inc
+include tools/python-yasm/Makefile.inc
diff --git a/tools/genmacro/Makefile.inc b/tools/genmacro/Makefile.inc
new file mode 100644
index 0000000..722f95d
--- /dev/null
+++ b/tools/genmacro/Makefile.inc
@@ -0,0 +1,14 @@
+# These utility programs have to be built for BUILD host in cross-build.
+# This makes things rather non-standard automake
+
+noinst_PROGRAMS += genmacro
+
+genmacro_SOURCES =
+EXTRA_DIST += tools/genmacro/genmacro.c
+genmacro_LDADD = genmacro.$(OBJEXT)
+genmacro_LINK = $(CCLD_FOR_BUILD) -o $@
+
+genmacro.$(OBJEXT): tools/genmacro/genmacro.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/genmacro/genmacro.c || echo '$(srcdir)/'`tools/genmacro/genmacro.c
+
diff --git a/tools/genmacro/genmacro.c b/tools/genmacro/genmacro.c
new file mode 100644
index 0000000..8e702b8
--- /dev/null
+++ b/tools/genmacro/genmacro.c
@@ -0,0 +1,134 @@
+/*
+ *
+ * C version of NASM's macros.pl
+ *
+ * Copyright (C) 2004-2008 Peter Johnson
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAXLINE 1024
+
+int
+main(int argc, char *argv[])
+{
+ FILE *in, *out;
+ int i;
+ char *str;
+ char *strp;
+ char *charp;
+ int fline;
+ int line = 0;
+ int lindex = 0;
+ size_t len;
+
+ if (argc < 4) {
+ fprintf(stderr, "Usage: %s <out> <var> <file> [<file> ...]\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ out = fopen(argv[1], "wt");
+
+ if (!out) {
+ fprintf(stderr, "Could not open `%s'.\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ str = malloc(MAXLINE);
+
+ fprintf(out, "/* This file auto-generated from standard.mac by genmacro.c"
+ " - don't edit it */\n\n#include <stddef.h>\n\n"
+ "static const char *%s[] = {\n", argv[2]);
+
+ for (i=3; i<argc; i++) {
+ in = fopen(argv[i], "rt");
+ if (!in) {
+ fprintf(stderr, "Could not open `%s'.\n", argv[i]);
+ fclose(out);
+ remove(argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ fline = 0;
+
+ while (fgets(str, MAXLINE, in)) {
+ line++;
+ fline++;
+
+ strp = str;
+
+ /* check for unterminated quotes and delete comments */
+ charp = strp;
+ while ((charp = strpbrk(charp, "'\";"))) {
+ if (charp[0] == ';') {
+ *charp = '\0';
+ break;
+ }
+ if ((charp = strchr(charp+1, charp[0])) == NULL) {
+ fprintf(stderr, "%s:%d: error: unterminated quote\n",
+ argv[i], fline);
+ fclose(out);
+ remove(argv[1]);
+ return EXIT_FAILURE;
+ }
+ charp++;
+ }
+
+ /* strip off leading and trailing whitespace */
+ while (*strp == ' ' || *strp == '\t')
+ strp++;
+ len = strlen(strp);
+ while (len > 0 && (strp[len-1] == ' ' || strp[len-1] == '\t' ||
+ strp[len-1] == '\n')) {
+ strp[len-1] = '\0';
+ len--;
+ }
+
+ /* skip blank lines */
+ if (len == 0)
+ continue;
+
+ /* output as string to output file */
+ fprintf(out, " \"");
+ while (*strp != '\0') {
+ if (*strp == '\\' || *strp == '"')
+ fputc('\\', out);
+ fputc(*strp, out);
+ strp++;
+ }
+ fprintf(out, "\",\n");
+ lindex++;
+ }
+
+ fclose(in);
+ }
+
+ fprintf(out, " NULL\n};\n");
+ fclose(out);
+
+ free(str);
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/genperf/Makefile.inc b/tools/genperf/Makefile.inc
new file mode 100644
index 0000000..135da6b
--- /dev/null
+++ b/tools/genperf/Makefile.inc
@@ -0,0 +1,42 @@
+# These utility programs have to be built for BUILD host in cross-build.
+# This makes things rather non-standard automake
+
+noinst_PROGRAMS += genperf
+
+# Suffix rule for genperf
+SUFFIXES += .gperf
+.gperf.c: genperf$(EXEEXT)
+ $(top_builddir)/genperf$(EXEEXT) $< $@
+
+genperf_SOURCES =
+EXTRA_DIST += tools/genperf/genperf.c
+EXTRA_DIST += tools/genperf/perfect.c
+EXTRA_DIST += tools/genperf/perfect.h
+EXTRA_DIST += tools/genperf/standard.h
+genperf_LDADD = genperf.$(OBJEXT)
+genperf_LDADD += gp-perfect.$(OBJEXT)
+genperf_LDADD += gp-phash.$(OBJEXT)
+genperf_LDADD += gp-xmalloc.$(OBJEXT)
+genperf_LDADD += gp-xstrdup.$(OBJEXT)
+genperf_LINK = $(CCLD_FOR_BUILD) -o $@
+
+genperf.$(OBJEXT): tools/genperf/genperf.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/genperf/genperf.c || echo '$(srcdir)/'`tools/genperf/genperf.c
+
+gp-perfect.$(OBJEXT): tools/genperf/perfect.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/genperf/perfect.c || echo '$(srcdir)/'`tools/genperf/perfect.c
+
+gp-phash.$(OBJEXT): libyasm/phash.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f libyasm/phash.c || echo '$(srcdir)/'`libyasm/phash.c
+
+gp-xmalloc.$(OBJEXT): libyasm/xmalloc.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f libyasm/xmalloc.c || echo '$(srcdir)/'`libyasm/xmalloc.c
+
+gp-xstrdup.$(OBJEXT): libyasm/xstrdup.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f libyasm/xstrdup.c || echo '$(srcdir)/'`libyasm/xstrdup.c
+
diff --git a/tools/genperf/genperf.c b/tools/genperf/genperf.c
new file mode 100644
index 0000000..c3cfa76
--- /dev/null
+++ b/tools/genperf/genperf.c
@@ -0,0 +1,540 @@
+/*
+ *
+ * Generate Minimal Perfect Hash (genperf)
+ *
+ * Copyright (C) 2006-2007 Peter Johnson
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <string.h>
+#include "tools/genperf/perfect.h"
+#include "libyasm/compat-queue.h"
+#include "libyasm/coretype.h"
+#include "libyasm/errwarn.h"
+
+typedef STAILQ_HEAD(slist, sval) slist;
+typedef struct sval {
+ STAILQ_ENTRY(sval) link;
+ char *str;
+} sval;
+
+typedef STAILQ_HEAD(keyword_list, keyword) keyword_list;
+typedef struct keyword {
+ STAILQ_ENTRY(keyword) link;
+ char *name;
+ char *args;
+ unsigned int line;
+} keyword;
+
+static unsigned int cur_line = 1;
+static int errors = 0;
+
+static void
+report_error(const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "%u: ", cur_line);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+ errors++;
+}
+
+void
+yasm__fatal(const char *message, ...)
+{
+ abort();
+}
+
+/* make the c output for the perfect hash tab array */
+static void
+make_c_tab(
+ FILE *f,
+ bstuff *tab, /* table indexed by b */
+ ub4 smax, /* range of scramble[] */
+ ub4 blen, /* b in 0..blen-1, power of 2 */
+ ub4 *scramble) /* used in final hash */
+{
+ ub4 i;
+ /* table for the mapping for the perfect hash */
+ if (blen >= USE_SCRAMBLE) {
+ /* A way to make the 1-byte values in tab bigger */
+ if (smax > UB2MAXVAL+1) {
+ fprintf(f, " static const unsigned long scramble[] = {\n");
+ for (i=0; i<=UB1MAXVAL; i+=4)
+ fprintf(f, " 0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n",
+ scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
+ } else {
+ fprintf(f, " static const unsigned short scramble[] = {\n");
+ for (i=0; i<=UB1MAXVAL; i+=8)
+ fprintf(f,
+" 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n",
+ scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
+ scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
+ }
+ fprintf(f, " };\n");
+ fprintf(f, "\n");
+ }
+
+ if (blen > 0) {
+ /* small adjustments to _a_ to make values distinct */
+ if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
+ fprintf(f, " static const unsigned char ");
+ else
+ fprintf(f, " static const unsigned short ");
+ fprintf(f, "tab[] = {\n");
+
+ if (blen < 16) {
+ for (i=0; i<blen; ++i)
+ fprintf(f, "%3ld,", scramble[tab[i].val_b]);
+ } else if (blen <= 1024) {
+ for (i=0; i<blen; i+=16)
+ fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+ scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
+ scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
+ scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
+ scramble[tab[i+6].val_b], scramble[tab[i+7].val_b],
+ scramble[tab[i+8].val_b], scramble[tab[i+9].val_b],
+ scramble[tab[i+10].val_b], scramble[tab[i+11].val_b],
+ scramble[tab[i+12].val_b], scramble[tab[i+13].val_b],
+ scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]);
+ } else if (blen < USE_SCRAMBLE) {
+ for (i=0; i<blen; i+=8)
+ fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+ scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
+ scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
+ scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
+ scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]);
+ } else {
+ for (i=0; i<blen; i+=16)
+ fprintf(f, " %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,\n",
+ tab[i+0].val_b, tab[i+1].val_b,
+ tab[i+2].val_b, tab[i+3].val_b,
+ tab[i+4].val_b, tab[i+5].val_b,
+ tab[i+6].val_b, tab[i+7].val_b,
+ tab[i+8].val_b, tab[i+9].val_b,
+ tab[i+10].val_b, tab[i+11].val_b,
+ tab[i+12].val_b, tab[i+13].val_b,
+ tab[i+14].val_b, tab[i+15].val_b);
+ }
+ fprintf(f, " };\n");
+ fprintf(f, "\n");
+ }
+}
+
+static void
+perfect_gen(FILE *out, const char *lookup_function_name,
+ const char *struct_name, keyword_list *kws,
+ const char *filename)
+{
+ ub4 nkeys;
+ key *keys;
+ hashform form;
+ bstuff *tab; /* table indexed by b */
+ hstuff *tabh; /* table indexed by hash value */
+ ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */
+ ub4 alen; /* a in 0..alen-1, a power of 2 */
+ ub4 blen; /* b in 0..blen-1, a power of 2 */
+ ub4 salt; /* a parameter to the hash function */
+ gencode final; /* code for final hash */
+ ub4 i;
+ ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */
+ char buf[10][80]; /* buffer for generated code */
+ char *buf2[10]; /* also for generated code */
+ keyword *kw;
+
+ /* perfect hash configuration */
+ form.mode = NORMAL_HM;
+ form.hashtype = STRING_HT;
+ form.perfect = MINIMAL_HP;
+ form.speed = SLOW_HS;
+
+ /* set up code for final hash */
+ final.line = buf2;
+ final.used = 0;
+ final.len = 10;
+ for (i=0; i<10; i++)
+ final.line[i] = buf[i];
+
+ /* build list of keys */
+ nkeys = 0;
+ keys = NULL;
+ STAILQ_FOREACH(kw, kws, link) {
+ key *k = yasm_xmalloc(sizeof(key));
+
+ k->name_k = yasm__xstrdup(kw->name);
+ k->len_k = (ub4)strlen(kw->name);
+ k->next_k = keys;
+ keys = k;
+ nkeys++;
+ }
+
+ /* find the hash */
+ findhash(&tab, &tabh, &alen, &blen, &salt, &final,
+ scramble, &smax, keys, nkeys, &form);
+
+ /* The hash function beginning */
+ fprintf(out, "static const struct %s *\n", struct_name);
+ fprintf(out, "%s(const char *key, size_t len)\n", lookup_function_name);
+ fprintf(out, "{\n");
+
+ /* output the dir table: this should loop up to smax for NORMAL_HP,
+ * or up to pakd.nkeys for MINIMAL_HP.
+ */
+ fprintf(out, " static const struct %s pd[%lu] = {\n", struct_name, nkeys);
+ for (i=0; i<nkeys; i++) {
+ if (tabh[i].key_h) {
+ STAILQ_FOREACH(kw, kws, link) {
+ if (strcmp(kw->name, tabh[i].key_h->name_k) == 0)
+ break;
+ }
+ if (!kw) {
+ report_error("internal error: could not find `%s'",
+ tabh[i].key_h->name_k);
+ break;
+ }
+ fprintf(out, "#line %u \"%s\"\n", kw->line, filename);
+ fprintf(out, " {\"%s\"%s}", kw->name, kw->args);
+ } else
+ fprintf(out, " { NULL }");
+
+ if (i < nkeys-1)
+ fprintf(out, ",");
+ fprintf(out, "\n");
+ }
+ fprintf(out, " };\n");
+
+ /* output the hash tab[] array */
+ make_c_tab(out, tab, smax, blen, scramble);
+
+ /* The hash function body */
+ fprintf(out, " const struct %s *ret;\n", struct_name);
+ for (i=0; i<final.used; ++i)
+ fprintf(out, "%s", final.line[i]);
+ fprintf(out, " if (rsl >= %lu) return NULL;\n", nkeys);
+ fprintf(out, " ret = &pd[rsl];\n");
+ fprintf(out, " if (strcmp(key, ret->name) != 0) return NULL;\n");
+ fprintf(out, " return ret;\n");
+ fprintf(out, "}\n");
+ fprintf(out, "\n");
+
+ free(tab);
+ free(tabh);
+}
+
+int
+main(int argc, char *argv[])
+{
+ FILE *in, *out;
+ size_t i;
+ char *ch;
+ static char line[1024], tmp[1024];
+ static char struct_name[128] = "";
+ static char lookup_function_name[128] = "in_word_set";
+ static char language[16] = "";
+ static char delimiters[16] = ",\r\n";
+ static char name[128];
+ static char filename[768];
+ int need_struct = 0;
+ int have_struct = 0;
+ int go_keywords = 0;
+ int ignore_case = 0;
+ int compare_strncmp = 0;
+ int readonly_tables = 0;
+ slist usercode, usercode2;
+ keyword_list keywords;
+ sval *sv;
+ keyword *kw;
+
+ if (argc != 3) {
+ fprintf(stderr, "Usage: genperf <in> <out>\n");
+ return EXIT_FAILURE;
+ }
+
+ in = fopen(argv[1], "rt");
+ if (!in) {
+ fprintf(stderr, "Could not open `%s' for reading\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ ch = argv[1];
+ i = 0;
+ while (*ch && i < 767) {
+ if (*ch == '\\') {
+ filename[i++] = '/';
+ ch++;
+ } else
+ filename[i++] = *ch++;
+ }
+ filename[i] = '\0';
+
+ STAILQ_INIT(&usercode);
+ STAILQ_INIT(&usercode2);
+ STAILQ_INIT(&keywords);
+
+ /* Parse declarations section */
+ while (fgets(line, 1024, in)) {
+ /* Comments start with # as the first thing on a line */
+ if (line[0] == '#') {
+ cur_line++;
+ continue;
+ }
+
+ /* Handle structure declaration */
+ if (strncmp(line, "struct", 6) == 0) {
+ int braces;
+
+ if (!need_struct) {
+ report_error("struct without %%struct-type declaration");
+ return EXIT_FAILURE;
+ }
+ if (have_struct) {
+ report_error("more than one struct declaration");
+ return EXIT_FAILURE;
+ }
+ have_struct = 1;
+
+ /* copy struct name */
+ ch = &line[6];
+ while (isspace(*ch))
+ ch++;
+ i = 0;
+ while ((isalnum(*ch) || *ch == '_') && i < 127)
+ struct_name[i++] = *ch++;
+ if (i == 127) {
+ report_error("struct name too long");
+ return EXIT_FAILURE;
+ }
+ struct_name[i] = '\0';
+
+ sv = yasm_xmalloc(sizeof(sval));
+ sprintf(tmp, "#line %u \"%s\"\n", cur_line, filename);
+ sv->str = yasm__xstrdup(tmp);
+ STAILQ_INSERT_TAIL(&usercode, sv, link);
+
+ braces = 0;
+ do {
+ /* count braces to determine when we're done */
+ ch = line;
+ while (*ch != '\0') {
+ if (*ch == '{')
+ braces++;
+ if (*ch == '}')
+ braces--;
+ ch++;
+ }
+ sv = yasm_xmalloc(sizeof(sval));
+ sv->str = yasm__xstrdup(line);
+ STAILQ_INSERT_TAIL(&usercode, sv, link);
+ cur_line++;
+ if (braces <= 0)
+ break;
+ } while (fgets(line, 1024, in));
+ cur_line++;
+ continue;
+ }
+
+ /* Ignore non-declaration lines */
+ if (line[0] != '%') {
+ cur_line++;
+ continue;
+ }
+
+ /* %% terminates declarations section */
+ if (line[1] == '%') {
+ if (need_struct && !have_struct) {
+ report_error("%%struct-type declaration, but no struct found");
+ return EXIT_FAILURE;
+ }
+ go_keywords = 1;
+ break; /* move on to keywords section */
+ }
+
+ /* %{ begins a verbatim code section that ends with %} */
+ if (line[1] == '{') {
+ sv = yasm_xmalloc(sizeof(sval));
+ sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename);
+ sv->str = yasm__xstrdup(tmp);
+ STAILQ_INSERT_TAIL(&usercode, sv, link);
+
+ while (fgets(line, 1024, in)) {
+ cur_line++;
+ if (line[0] == '%' && line[1] == '}')
+ break;
+ sv = yasm_xmalloc(sizeof(sval));
+ sv->str = yasm__xstrdup(line);
+ STAILQ_INSERT_TAIL(&usercode, sv, link);
+ }
+ cur_line++;
+ continue;
+ }
+
+ if (strncmp(&line[1], "ignore-case", 11) == 0) {
+ ignore_case = 1;
+ } else if (strncmp(&line[1], "compare-strncmp", 15) == 0) {
+ compare_strncmp = 1;
+ } else if (strncmp(&line[1], "readonly-tables", 15) == 0) {
+ readonly_tables = 1;
+ } else if (strncmp(&line[1], "language=", 9) == 0) {
+ ch = &line[10];
+ i = 0;
+ while (*ch != '\n' && i<15)
+ language[i++] = *ch++;
+ language[i] = '\0';
+ } else if (strncmp(&line[1], "delimiters=", 11) == 0) {
+ ch = &line[12];
+ i = 0;
+ while (i<15)
+ delimiters[i++] = *ch++;
+ delimiters[i] = '\0';
+ } else if (strncmp(&line[1], "enum", 4) == 0) {
+ /* unused */
+ } else if (strncmp(&line[1], "struct-type", 11) == 0) {
+ need_struct = 1;
+ } else if (strncmp(&line[1], "define", 6) == 0) {
+ /* Several different defines we need to handle */
+ ch = &line[7];
+ while (isspace(*ch))
+ ch++;
+
+ if (strncmp(ch, "hash-function-name", 18) == 0) {
+ /* unused */
+ } else if (strncmp(ch, "lookup-function-name", 20) == 0) {
+ ch = &line[7+20+1];
+ while (isspace(*ch))
+ ch++;
+ i = 0;
+ while ((isalnum(*ch) || *ch == '_') && i < 127)
+ lookup_function_name[i++] = *ch++;
+ if (i == 127) {
+ report_error("struct name too long");
+ return EXIT_FAILURE;
+ }
+ lookup_function_name[i] = '\0';
+ } else {
+ fprintf(stderr, "%u: unrecognized define `%s'\n", cur_line,
+ line);
+ }
+ } else {
+ fprintf(stderr, "%u: unrecognized declaration `%s'\n", cur_line,
+ line);
+ }
+
+ cur_line++;
+ }
+
+ if (!go_keywords) {
+ report_error("no keywords section found");
+ return EXIT_FAILURE;
+ }
+
+ /* Parse keywords section */
+ while (fgets(line, 1024, in)) {
+ char *d;
+
+ /* Comments start with # as the first thing on a line */
+ if (line[0] == '#') {
+ cur_line++;
+ continue;
+ }
+
+ /* Keywords section terminated with %% */
+ if (line[0] == '%' && line[1] == '%')
+ break;
+
+ /* Look for name */
+ ch = &line[0];
+ i = 0;
+ while (strchr(delimiters, *ch) == NULL && i < 127)
+ name[i++] = *ch++;
+ if (i == 127) {
+ report_error("keyword name too long");
+ return EXIT_FAILURE;
+ }
+ name[i] = '\0';
+
+ /* Strip EOL */
+ d = strrchr(ch, '\n');
+ if (d)
+ *d = '\0';
+ d = strrchr(ch, '\r');
+ if (d)
+ *d = '\0';
+ kw = yasm_xmalloc(sizeof(keyword));
+ kw->name = yasm__xstrdup(name);
+ kw->args = yasm__xstrdup(ch);
+ kw->line = cur_line;
+ STAILQ_INSERT_TAIL(&keywords, kw, link);
+ cur_line++;
+ }
+
+ if (errors > 0)
+ return EXIT_FAILURE;
+
+ /* Pull in any end code */
+ if (!feof(in)) {
+ sv = yasm_xmalloc(sizeof(sval));
+ sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename);
+ sv->str = yasm__xstrdup(tmp);
+ STAILQ_INSERT_TAIL(&usercode2, sv, link);
+
+ while (fgets(line, 1024, in)) {
+ sv = yasm_xmalloc(sizeof(sval));
+ sv->str = yasm__xstrdup(line);
+ STAILQ_INSERT_TAIL(&usercode2, sv, link);
+ }
+ }
+
+ /* output code */
+ out = fopen(argv[2], "wt");
+ if (!out) {
+ fprintf(stderr, "Could not open `%s' for writing\n", argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ fprintf(out, "/* %s code produced by genperf */\n", language);
+ fprintf(out, "/* Command-line: genperf %s %s */\n", argv[1], argv[2]);
+
+ STAILQ_FOREACH(sv, &usercode, link)
+ fprintf(out, "%s", sv->str);
+
+ /* Get perfect hash */
+ perfect_gen(out, lookup_function_name, struct_name, &keywords, filename);
+
+ STAILQ_FOREACH(sv, &usercode2, link)
+ fprintf(out, "%s", sv->str);
+
+ fclose(out);
+
+ if (errors > 0) {
+ remove(argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
diff --git a/tools/genperf/perfect.c b/tools/genperf/perfect.c
new file mode 100644
index 0000000..7cd6867
--- /dev/null
+++ b/tools/genperf/perfect.c
@@ -0,0 +1,1178 @@
+/* Modified for use with yasm by Peter Johnson. */
+/*
+------------------------------------------------------------------------------
+perfect.c: code to generate code for a hash for perfect hashing.
+(c) Bob Jenkins, September 1996, December 1999
+You may use this code in any way you wish, and it is free. No warranty.
+I hereby place this in the public domain.
+Source is http://burtleburtle.net/bob/c/perfect.c
+
+This generates a minimal perfect hash function. That means, given a
+set of n keys, this determines a hash function that maps each of
+those keys into a value in 0..n-1 with no collisions.
+
+The perfect hash function first uses a normal hash function on the key
+to determine (a,b) such that the pair (a,b) is distinct for all
+keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
+tab[] is an array of 1-byte values and scramble[] is a 256-term array of
+2-byte or 4-byte values. If there are n keys, the length of tab[] is a
+power of two between n/3 and n.
+
+I found the idea of computing distinct (a,b) values in "Practical minimal
+perfect hash functions for large databases", Fox, Heath, Chen, and Daoud,
+Communications of the ACM, January 1992. They found the idea in Chichelli
+(CACM Jan 1980). Beyond that, our methods differ.
+
+The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
+0..*blen*-1. A fast hash function determines both a and b
+simultaneously. Any decent hash function is likely to produce
+hashes so that (a,b) is distinct for all pairs. I try the hash
+using different values of *salt* until all pairs are distinct.
+
+The final hash is (a XOR scramble[tab[b]]). *scramble* is a
+predetermined mapping of 0..255 into 0..smax-1. *tab* is an
+array that we fill in in such a way as to make the hash perfect.
+
+First we fill in all values of *tab* that are used by more than one
+key. We try all possible values for each position until one works.
+
+This leaves m unmapped keys and m values that something could hash to.
+If you treat unmapped keys as lefthand nodes and unused hash values
+as righthand nodes, and draw a line connecting each key to each hash
+value it could map to, you get a bipartite graph. We attempt to
+find a perfect matching in this graph. If we succeed, we have
+determined a perfect hash for the whole set of keys.
+
+*scramble* is used because (a^tab[i]) clusters keys around *a*.
+------------------------------------------------------------------------------
+*/
+
+#include <string.h>
+#include "tools/genperf/standard.h"
+#include "libyasm/coretype.h"
+#include "libyasm/phash.h"
+#include "tools/genperf/perfect.h"
+
+#define CHECKSTATE 8
+
+/*
+------------------------------------------------------------------------------
+Find the mapping that will produce a perfect hash
+------------------------------------------------------------------------------
+*/
+
+/* return the ceiling of the log (base 2) of val */
+ub4 phash_log2(val)
+ub4 val;
+{
+ ub4 i;
+ for (i=0; ((ub4)1<<i) < val; ++i)
+ ;
+ return i;
+}
+
+/* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
+/* permute(0)=0. This is intended and useful. */
+static ub4 permute(
+ ub4 x, /* input, a value in some range */
+ ub4 nbits) /* input, number of bits in range */
+{
+ int i;
+ int mask = ((ub4)1<<nbits)-1; /* all ones */
+ int const2 = 1+nbits/2;
+ int const3 = 1+nbits/3;
+ int const4 = 1+nbits/4;
+ int const5 = 1+nbits/5;
+ for (i=0; i<20; ++i)
+ {
+ x = (x+(x<<const2)) & mask;
+ x = (x^(x>>const3));
+ x = (x+(x<<const4)) & mask;
+ x = (x^(x>>const5));
+ }
+ return x;
+}
+
+/* initialize scramble[] with distinct random values in 0..smax-1 */
+static void scrambleinit(
+ ub4 *scramble, /* hash is a^scramble[tab[b]] */
+ ub4 smax) /* scramble values should be in 0..smax-1 */
+{
+ ub4 i;
+
+ /* fill scramble[] with distinct random integers in 0..smax-1 */
+ for (i=0; i<SCRAMBLE_LEN; ++i)
+ {
+ scramble[i] = permute(i, phash_log2(smax));
+ }
+}
+
+/*
+ * Check if key1 and key2 are the same.
+ * We already checked (a,b) are the same.
+ */
+static void checkdup(
+ key *key1,
+ key *key2,
+ hashform *form)
+{
+ switch(form->hashtype)
+ {
+ case STRING_HT:
+ if ((key1->len_k == key2->len_k) &&
+ !memcmp(key1->name_k, key2->name_k, (size_t)key1->len_k))
+ {
+ fprintf(stderr, "perfect.c: Duplicates keys! %.*s\n",
+ (int)key1->len_k, key1->name_k);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case INT_HT:
+ if (key1->hash_k == key2->hash_k)
+ {
+ fprintf(stderr, "perfect.c: Duplicate keys! %.8lx\n", key1->hash_k);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case AB_HT:
+ fprintf(stderr, "perfect.c: Duplicate keys! %.8lx %.8lx\n",
+ key1->a_k, key1->b_k);
+ exit(EXIT_FAILURE);
+ break;
+ default:
+ fprintf(stderr, "perfect.c: Illegal hash type %ld\n", (ub4)form->hashtype);
+ exit(EXIT_FAILURE);
+ break;
+ }
+}
+
+
+/*
+ * put keys in tabb according to key->b_k
+ * check if the initial hash might work
+ */
+static int inittab(
+ bstuff *tabb, /* output, list of keys with b for (a,b) */
+ ub4 blen, /* length of tabb */
+ key *keys, /* list of keys already hashed */
+ hashform *form, /* user directives */
+ int complete) /* TRUE means to complete init despite collisions */
+{
+ int nocollision = TRUE;
+ key *mykey;
+
+ memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
+
+ /* Two keys with the same (a,b) guarantees a collision */
+ for (mykey=keys; mykey; mykey=mykey->next_k)
+ {
+ key *otherkey;
+
+ for (otherkey=tabb[mykey->b_k].list_b;
+ otherkey;
+ otherkey=otherkey->nextb_k)
+ {
+ if (mykey->a_k == otherkey->a_k)
+ {
+ nocollision = FALSE;
+ checkdup(mykey, otherkey, form);
+ if (!complete)
+ return FALSE;
+ }
+ }
+ ++tabb[mykey->b_k].listlen_b;
+ mykey->nextb_k = tabb[mykey->b_k].list_b;
+ tabb[mykey->b_k].list_b = mykey;
+ }
+
+ /* no two keys have the same (a,b) pair */
+ return nocollision;
+}
+
+
+/* Do the initial hash for normal mode (use lookup and checksum) */
+static void initnorm(
+ key *keys, /* list of all keys */
+ ub4 alen, /* (a,b) has a in 0..alen-1, a power of 2 */
+ ub4 blen, /* (a,b) has b in 0..blen-1, a power of 2 */
+ ub4 smax, /* maximum range of computable hash values */
+ ub4 salt, /* used to initialize the hash function */
+ gencode *final) /* output, code for the final hash */
+{
+ key *mykey;
+ if (phash_log2(alen)+phash_log2(blen) > UB4BITS)
+ {
+ ub4 initlev = (salt*0x9e3779b9)&0xffffffff; /* the golden ratio; an arbitrary value */
+
+ for (mykey=keys; mykey; mykey=mykey->next_k)
+ {
+ ub4 i, state[CHECKSTATE];
+ for (i=0; i<CHECKSTATE; ++i) state[i] = initlev;
+ phash_checksum( mykey->name_k, mykey->len_k, state);
+ mykey->a_k = state[0]&(alen-1);
+ mykey->b_k = state[1]&(blen-1);
+ }
+ final->used = 4;
+ sprintf(final->line[0],
+ " unsigned long i,state[CHECKSTATE],rsl;\n");
+ sprintf(final->line[1],
+ " for (i=0; i<CHECKSTATE; ++i) state[i]=0x%lx;\n",initlev);
+ sprintf(final->line[2],
+ " phash_checksum(key, len, state);\n");
+ sprintf(final->line[3],
+ " rsl = ((state[0]&0x%lx)^scramble[tab[state[1]&0x%lx]]);\n",
+ alen-1, blen-1);
+ }
+ else
+ {
+ ub4 loga = phash_log2(alen); /* log based 2 of blen */
+ ub4 initlev = (salt*0x9e3779b9)&0xffffffff; /* the golden ratio; an arbitrary value */
+
+ for (mykey=keys; mykey; mykey=mykey->next_k)
+ {
+ ub4 hash = phash_lookup(mykey->name_k, mykey->len_k, initlev);
+ mykey->a_k = (loga > 0) ? hash>>(UB4BITS-loga) : 0;
+ mykey->b_k = (blen > 1) ? hash&(blen-1) : 0;
+ }
+ final->used = 2;
+ sprintf(final->line[0],
+ " unsigned long rsl, val = phash_lookup(key, len, 0x%lxUL);\n", initlev);
+ if (smax <= 1)
+ {
+ sprintf(final->line[1], " rsl = 0;\n");
+ }
+ else if (blen < USE_SCRAMBLE)
+ {
+ sprintf(final->line[1], " rsl = ((val>>%ld)^tab[val&0x%lx]);\n",
+ UB4BITS-phash_log2(alen), blen-1);
+ }
+ else
+ {
+ sprintf(final->line[1], " rsl = ((val>>%ld)^scramble[tab[val&0x%lx]]);\n",
+ UB4BITS-phash_log2(alen), blen-1);
+ }
+ }
+}
+
+
+
+/* Do initial hash for inline mode */
+static void initinl(
+ key *keys, /* list of all keys */
+ ub4 alen, /* (a,b) has a in 0..alen-1, a power of 2 */
+ ub4 blen, /* (a,b) has b in 0..blen-1, a power of 2 */
+ ub4 smax, /* range of computable hash values */
+ ub4 salt, /* used to initialize the hash function */
+ gencode *final) /* generated code for final hash */
+{
+ key *mykey;
+ ub4 amask = alen-1;
+ ub4 blog = phash_log2(blen);
+ ub4 initval = salt*0x9e3779b9; /* the golden ratio; an arbitrary value */
+
+ /* It's more important to have b uniform than a, so b is the low bits */
+ for (mykey = keys; mykey != (key *)0; mykey = mykey->next_k)
+ {
+ ub4 hash = initval;
+ ub4 i;
+ for (i=0; i<mykey->len_k; ++i)
+ {
+ hash = ((ub1)mykey->name_k[i] ^ hash) + ((hash<<(UB4BITS-6))+(hash>>6));
+ }
+ mykey->hash_k = hash;
+ mykey->a_k = (alen > 1) ? (hash & amask) : 0;
+ mykey->b_k = (blen > 1) ? (hash >> (UB4BITS-blog)) : 0;
+ }
+ final->used = 1;
+ if (smax <= 1)
+ {
+ sprintf(final->line[0], " unsigned long rsl = 0;\n");
+ }
+ else if (blen < USE_SCRAMBLE)
+ {
+ sprintf(final->line[0], " unsigned long rsl = ((val & 0x%lx) ^ tab[val >> %ld]);\n",
+ amask, UB4BITS-blog);
+ }
+ else
+ {
+ sprintf(final->line[0], " unsigned long rsl = ((val & 0x%lx) ^ scramble[tab[val >> %ld]]);\n",
+ amask, UB4BITS-blog);
+ }
+}
+
+
+/*
+ * Run a hash function on the key to get a and b
+ * Returns:
+ * 0: didn't find distinct (a,b) for all keys
+ * 1: found distinct (a,b) for all keys, put keys in tabb[]
+ * 2: found a perfect hash, no need to do any more work
+ */
+static ub4 initkey(
+ key *keys, /* list of all keys */
+ ub4 nkeys, /* total number of keys */
+ bstuff *tabb, /* stuff indexed by b */
+ ub4 alen, /* (a,b) has a in 0..alen-1, a power of 2 */
+ ub4 blen, /* (a,b) has b in 0..blen-1, a power of 2 */
+ ub4 smax, /* range of computable hash values */
+ ub4 salt, /* used to initialize the hash function */
+ hashform *form, /* user directives */
+ gencode *final) /* code for final hash */
+{
+ /* Do the initial hash of the keys */
+ switch(form->mode)
+ {
+ case NORMAL_HM:
+ initnorm(keys, alen, blen, smax, salt, final);
+ break;
+ case INLINE_HM:
+ initinl(keys, alen, blen, smax, salt, final);
+ break;
+#if 0
+ case HEX_HM:
+ case DECIMAL_HM:
+ finished = inithex(keys, nkeys, alen, blen, smax, salt, final, form);
+ if (finished) return 2;
+ break;
+#endif
+ default:
+ fprintf(stderr, "fatal error: illegal mode\n");
+ exit(1);
+ }
+
+ if (nkeys <= 1)
+ {
+ final->used = 1;
+ sprintf(final->line[0], " unsigned long rsl = 0;\n");
+ return 2;
+ }
+
+ return inittab(tabb, blen, keys, form, FALSE);
+}
+
+/* Print an error message and exit if there are duplicates */
+static void duplicates(
+ bstuff *tabb, /* array of lists of keys with the same b */
+ ub4 blen, /* length of tabb, a power of 2 */
+ key *keys,
+ hashform *form) /* user directives */
+{
+ ub4 i;
+ key *key1;
+ key *key2;
+
+ (void)inittab(tabb, blen, keys, form, TRUE);
+
+ /* for each b, do nested loops through key list looking for duplicates */
+ for (i=0; i<blen; ++i)
+ for (key1=tabb[i].list_b; key1; key1=key1->nextb_k)
+ for (key2=key1->nextb_k; key2; key2=key2->nextb_k)
+ checkdup(key1, key2, form);
+}
+
+
+/* Try to apply an augmenting list */
+static int apply(
+ bstuff *tabb,
+ hstuff *tabh,
+ qstuff *tabq,
+ ub4 blen,
+ ub4 *scramble,
+ ub4 tail,
+ int rollback) /* FALSE applies augmenting path, TRUE rolls back */
+{
+ ub4 hash;
+ key *mykey;
+ bstuff *pb;
+ ub4 child;
+ ub4 parent;
+ ub4 stabb; /* scramble[tab[b]] */
+
+ /* walk from child to parent */
+ for (child=tail-1; child; child=parent)
+ {
+ parent = tabq[child].parent_q; /* find child's parent */
+ pb = tabq[parent].b_q; /* find parent's list of siblings */
+
+ /* erase old hash values */
+ stabb = scramble[pb->val_b];
+ for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
+ {
+ hash = mykey->a_k^stabb;
+ if (mykey == tabh[hash].key_h)
+ { /* erase hash for all of child's siblings */
+ tabh[hash].key_h = (key *)0;
+ }
+ }
+
+ /* change pb->val_b, which will change the hashes of all parent siblings */
+ pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
+
+ /* set new hash values */
+ stabb = scramble[pb->val_b];
+ for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
+ {
+ hash = mykey->a_k^stabb;
+ if (rollback)
+ {
+ if (parent == 0) continue; /* root never had a hash */
+ }
+ else if (tabh[hash].key_h)
+ {
+ /* very rare: roll back any changes */
+ apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
+ return FALSE; /* failure, collision */
+ }
+ tabh[hash].key_h = mykey;
+ }
+ }
+ return TRUE;
+}
+
+
+/*
+-------------------------------------------------------------------------------
+augment(): Add item to the mapping.
+
+Construct a spanning tree of *b*s with *item* as root, where each
+parent can have all its hashes changed (by some new val_b) with
+at most one collision, and each child is the b of that collision.
+
+I got this from Tarjan's "Data Structures and Network Algorithms". The
+path from *item* to a *b* that can be remapped with no collision is
+an "augmenting path". Change values of tab[b] along the path so that
+the unmapped key gets mapped and the unused hash value gets used.
+
+Assuming 1 key per b, if m out of n hash values are still unused,
+you should expect the transitive closure to cover n/m nodes before
+an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect
+this approach to take about nlogn time to map all single-key b's.
+-------------------------------------------------------------------------------
+*/
+static int augment(
+ bstuff *tabb, /* stuff indexed by b */
+ hstuff *tabh, /* which key is associated with which hash, indexed by hash */
+ qstuff *tabq, /* queue of *b* values, this is the spanning tree */
+ ub4 blen, /* length of tabb */
+ ub4 *scramble, /* final hash is a^scramble[tab[b]] */
+ ub4 smax, /* highest value in scramble */
+ bstuff *item, /* &tabb[b] for the b to be mapped */
+ ub4 nkeys, /* final hash must be in 0..nkeys-1 */
+ ub4 highwater, /* a value higher than any now in tabb[].water_b */
+ hashform *form) /* TRUE if we should do a minimal perfect hash */
+{
+ ub4 q; /* current position walking through the queue */
+ ub4 tail; /* tail of the queue. 0 is the head of the queue. */
+ ub4 limit=((blen < USE_SCRAMBLE) ? smax : UB1MAXVAL+1);
+ ub4 highhash = ((form->perfect == MINIMAL_HP) ? nkeys : smax);
+ int trans = (form->speed == SLOW_HS || form->perfect == MINIMAL_HP);
+
+ /* initialize the root of the spanning tree */
+ tabq[0].b_q = item;
+ tail = 1;
+
+ /* construct the spanning tree by walking the queue, add children to tail */
+ for (q=0; q<tail; ++q)
+ {
+ bstuff *myb = tabq[q].b_q; /* the b for this node */
+ ub4 i; /* possible value for myb->val_b */
+
+ if (!trans && (q == 1))
+ break; /* don't do transitive closure */
+
+ for (i=0; i<limit; ++i)
+ {
+ bstuff *childb = (bstuff *)0; /* the b that this i maps to */
+ key *mykey; /* for walking through myb's keys */
+
+ for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
+ {
+ key *childkey;
+ ub4 hash = mykey->a_k^scramble[i];
+
+ if (hash >= highhash) break; /* out of bounds */
+ childkey = tabh[hash].key_h;
+
+ if (childkey)
+ {
+ bstuff *hitb = &tabb[childkey->b_k];
+
+ if (childb)
+ {
+ if (childb != hitb) break; /* hit at most one child b */
+ }
+ else
+ {
+ childb = hitb; /* remember this as childb */
+ if (childb->water_b == highwater) break; /* already explored */
+ }
+ }
+ }
+ if (mykey) continue; /* myb with i has multiple collisions */
+
+ /* add childb to the queue of reachable things */
+ if (childb) childb->water_b = highwater;
+ tabq[tail].b_q = childb;
+ tabq[tail].newval_q = (ub2)i; /* how to make parent (myb) use this hash */
+ tabq[tail].oldval_q = myb->val_b; /* need this for rollback */
+ tabq[tail].parent_q = q;
+ ++tail;
+
+ if (!childb)
+ { /* found an *i* with no collisions? */
+ /* try to apply the augmenting path */
+ if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
+ return TRUE; /* success, item was added to the perfect hash */
+
+ --tail; /* don't know how to handle such a child! */
+ }
+ }
+ }
+ return FALSE;
+}
+
+
+/* find a mapping that makes this a perfect hash */
+static int perfect(
+ bstuff *tabb,
+ hstuff *tabh,
+ qstuff *tabq,
+ ub4 blen,
+ ub4 smax,
+ ub4 *scramble,
+ ub4 nkeys,
+ hashform *form)
+{
+ ub4 maxkeys; /* maximum number of keys for any b */
+ ub4 i, j;
+
+ /* clear any state from previous attempts */
+ memset((void *)tabh, 0,
+ (size_t)(sizeof(hstuff)*
+ ((form->perfect == MINIMAL_HP) ? nkeys : smax)));
+ memset((void *)tabq, 0, (size_t)(sizeof(qstuff)*(blen+1)));
+
+ for (maxkeys=0,i=0; i<blen; ++i)
+ if (tabb[i].listlen_b > maxkeys)
+ maxkeys = tabb[i].listlen_b;
+
+ /* In descending order by number of keys, map all *b*s */
+ for (j=maxkeys; j>0; --j)
+ for (i=0; i<blen; ++i)
+ if (tabb[i].listlen_b == j)
+ if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys,
+ i+1, form))
+ {
+ fprintf(stderr, "fail to map group of size %ld for tab size %ld\n", j, blen);
+ return FALSE;
+ }
+
+ /* Success! We found a perfect hash of all keys into 0..nkeys-1. */
+ return TRUE;
+}
+
+
+/*
+ * Simple case: user gave (a,b). No more mixing, no guessing alen or blen.
+ * This assumes a,b reside in (key->a_k, key->b_k), and final->form == AB_HK.
+ */
+static void hash_ab(
+ bstuff **tabb, /* output, tab[] of the perfect hash, length *blen */
+ ub4 *alen, /* output, 0..alen-1 is range for a of (a,b) */
+ ub4 *blen, /* output, 0..blen-1 is range for b of (a,b) */
+ ub4 *salt, /* output, initializes initial hash */
+ gencode *final, /* code for final hash */
+ ub4 *scramble, /* input, hash = a^scramble[tab[b]] */
+ ub4 *smax, /* input, scramble[i] in 0..smax-1 */
+ key *keys, /* input, keys to hash */
+ ub4 nkeys, /* input, number of keys being hashed */
+ hashform *form) /* user directives */
+{
+ hstuff *tabh;
+ qstuff *tabq;
+ key *mykey;
+ ub4 i;
+ int used_tab;
+
+ /* initially make smax the first power of two bigger than nkeys */
+ *smax = ((ub4)1<<phash_log2(nkeys));
+ scrambleinit(scramble, *smax);
+
+ /* set *alen and *blen based on max A and B from user */
+ *alen = 1;
+ *blen = 1;
+ for (mykey = keys; mykey != (key *)0; mykey = mykey->next_k)
+ {
+ while (*alen <= mykey->a_k) *alen *= 2;
+ while (*blen <= mykey->b_k) *blen *= 2;
+ }
+ if (*alen > 2**smax)
+ {
+ fprintf(stderr,
+ "perfect.c: Can't deal with (A,B) having A bigger than twice \n");
+ fprintf(stderr,
+ " the smallest power of two greater or equal to any legal hash.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* allocate working memory */
+ *tabb = (bstuff *)yasm_xmalloc((size_t)(sizeof(bstuff)*(*blen)));
+ tabq = (qstuff *)yasm_xmalloc(sizeof(qstuff)*(*blen+1));
+ tabh = (hstuff *)yasm_xmalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ?
+ nkeys : *smax));
+
+ /* check that (a,b) are distinct and put them in tabb indexed by b */
+ (void)inittab(*tabb, *blen, keys, form, FALSE);
+
+ /* try with smax */
+ if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
+ {
+ if (form->perfect == MINIMAL_HP)
+ {
+ fprintf(stderr, "fatal error: Cannot find perfect hash for user (A,B) pairs\n");
+ exit(EXIT_FAILURE);
+ }
+ else
+ {
+ /* try with 2*smax */
+ free((void *)tabh);
+ *smax = *smax * 2;
+ scrambleinit(scramble, *smax);
+ tabh = (hstuff *)yasm_xmalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ?
+ nkeys : *smax));
+ if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
+ {
+ fprintf(stderr, "fatal error: Cannot find perfect hash for user (A,B) pairs\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+
+ /* check if tab[] was really needed */
+ for (i=0; i<*blen; ++i)
+ {
+ if ((*tabb)[i].val_b != 0) break; /* assumes permute(0) == 0 */
+ }
+ used_tab = (i < *blen);
+
+ /* write the code for the perfect hash */
+ *salt = 1;
+ final->used = 1;
+ if (!used_tab)
+ {
+ sprintf(final->line[0], " unsigned long rsl = a;\n");
+ }
+ else if (*blen < USE_SCRAMBLE)
+ {
+ sprintf(final->line[0], " unsigned long rsl = (a ^ tab[b]);\n");
+ }
+ else
+ {
+ sprintf(final->line[0], " unsigned long rsl = (a ^ scramble[tab[b]]);\n");
+ }
+
+ free((void *)tabq);
+ free((void *)tabh);
+}
+
+
+/* guess initial values for alen and blen */
+static void initalen(
+ ub4 *alen, /* output, initial alen */
+ ub4 *blen, /* output, initial blen */
+ ub4 *smax,/* input, power of two greater or equal to max hash value */
+ ub4 nkeys, /* number of keys being hashed */
+ hashform *form) /* user directives */
+{
+ /*
+ * Find initial *alen, *blen
+ * Initial alen and blen values were found empirically. Some factors:
+ *
+ * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1.
+ *
+ * alen and blen must be powers of 2 because the values in 0..alen-1 and
+ * 0..blen-1 are produced by applying a bitmask to the initial hash function.
+ *
+ * alen must be less than smax, in fact less than nkeys, because otherwise
+ * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for
+ * all the *a*s associated with a given *b*, so there would be no legal
+ * value to assign to tab[b]. This only matters when we're doing a minimal
+ * perfect hash.
+ *
+ * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8)
+ * and alen*blen = smax*smax/32.
+ *
+ * Values of blen less than smax/4 never work, and smax/2 always works.
+ *
+ * We want blen as small as possible because it is the number of bytes in
+ * the huge array we must create for the perfect hash.
+ *
+ * When nkey <= smax*(5/8), blen=smax/4 works much more often with
+ * alen=smax/8 than with alen=smax/4. Above smax*(5/8), blen=smax/4
+ * doesn't seem to care whether alen=smax/8 or alen=smax/4. I think it
+ * has something to do with 5/8 = 1/8 * 5. For example examine 80000,
+ * 85000, and 90000 keys with different values of alen. This only matters
+ * if we're doing a minimal perfect hash.
+ *
+ * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer.
+ * Bigger than that it must produce two integers, which increases the
+ * cost of the hash per character hashed.
+ */
+ if (form->perfect == NORMAL_HP)
+ {
+ if ((form->speed == FAST_HS) && (nkeys > *smax*0.8))
+ {
+ *smax = *smax * 2;
+ }
+
+ *alen = ((form->hashtype==INT_HT) && *smax>131072) ?
+ ((ub4)1<<(UB4BITS-phash_log2(*blen))) : /* distinct keys => distinct (A,B) */
+ *smax; /* no reason to restrict alen to smax/2 */
+ if ((form->hashtype == INT_HT) && *smax < 32)
+ *blen = *smax; /* go for function speed not space */
+ else if (*smax/4 <= (1<<14))
+ *blen = ((nkeys <= *smax*0.56) ? *smax/32 :
+ (nkeys <= *smax*0.74) ? *smax/16 : *smax/8);
+ else
+ *blen = ((nkeys <= *smax*0.6) ? *smax/16 :
+ (nkeys <= *smax*0.8) ? *smax/8 : *smax/4);
+
+ if ((form->speed == FAST_HS) && (*blen < *smax/8))
+ *blen = *smax/8;
+
+ if (*alen < 1) *alen = 1;
+ if (*blen < 1) *blen = 1;
+ }
+ else
+ {
+ switch(phash_log2(*smax))
+ {
+ case 0:
+ *alen = 1;
+ *blen = 1;
+ case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
+ *alen = (form->perfect == NORMAL_HP) ? *smax : *smax/2;
+ *blen = *smax/2;
+ break;
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ case 16:
+ case 17:
+ if (form->speed == FAST_HS)
+ {
+ *alen = *smax/2;
+ *blen = *smax/4;
+ }
+ else if (*smax/4 < USE_SCRAMBLE)
+ {
+ *alen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
+ *blen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
+ }
+ else
+ {
+ *alen = ((nkeys <= *smax*(5.0/8.0)) ? *smax/8 :
+ (nkeys <= *smax*(3.0/4.0)) ? *smax/4 : *smax/2);
+ *blen = *smax/4; /* always give the small size a shot */
+ }
+ break;
+ case 18:
+ if (form->speed == FAST_HS)
+ {
+ *alen = *smax/2;
+ *blen = *smax/2;
+ }
+ else
+ {
+ *alen = *smax/8; /* never require the multiword hash */
+ *blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
+ }
+ break;
+ case 19:
+ case 20:
+ *alen = (nkeys <= *smax*(5.0/8.0)) ? *smax/8 : *smax/2;
+ *blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
+ break;
+ default:
+ *alen = *smax/2; /* just find a hash as quick as possible */
+ *blen = *smax/2; /* we'll be thrashing virtual memory at this size */
+ break;
+ }
+ }
+}
+
+/*
+** Try to find a perfect hash function.
+** Return the successful initializer for the initial hash.
+** Return 0 if no perfect hash could be found.
+*/
+void findhash(
+ bstuff **tabb, /* output, tab[] of the perfect hash, length *blen */
+ hstuff **tabh, /* output, table of keys indexed by hash value */
+ ub4 *alen, /* output, 0..alen-1 is range for a of (a,b) */
+ ub4 *blen, /* output, 0..blen-1 is range for b of (a,b) */
+ ub4 *salt, /* output, initializes initial hash */
+ gencode *final, /* code for final hash */
+ ub4 *scramble, /* input, hash = a^scramble[tab[b]] */
+ ub4 *smax, /* input, scramble[i] in 0..smax-1 */
+ key *keys, /* input, keys to hash */
+ ub4 nkeys, /* input, number of keys being hashed */
+ hashform *form) /* user directives */
+{
+ ub4 bad_initkey; /* how many times did initkey fail? */
+ ub4 bad_perfect; /* how many times did perfect fail? */
+ ub4 trysalt; /* trial initializer for initial hash */
+ ub4 maxalen;
+ qstuff *tabq; /* table of stuff indexed by queue value, used by augment */
+
+ /* The case of (A,B) supplied by the user is a special case */
+ if (form->hashtype == AB_HT)
+ {
+ hash_ab(tabb, alen, blen, salt, final,
+ scramble, smax, keys, nkeys, form);
+ return;
+ }
+
+ /* guess initial values for smax, alen and blen */
+ *smax = ((ub4)1<<phash_log2(nkeys));
+ initalen(alen, blen, smax, nkeys, form);
+
+ scrambleinit(scramble, *smax);
+
+ maxalen = (form->perfect == MINIMAL_HP) ? *smax/2 : *smax;
+
+ /* allocate working memory */
+ *tabb = (bstuff *)yasm_xmalloc((size_t)(sizeof(bstuff)*(*blen)));
+ tabq = (qstuff *)yasm_xmalloc(sizeof(qstuff)*(*blen+1));
+ *tabh = (hstuff *)yasm_xmalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ?
+ nkeys : *smax));
+
+ /* Actually find the perfect hash */
+ *salt = 0;
+ bad_initkey = 0;
+ bad_perfect = 0;
+ for (trysalt=1; ; ++trysalt)
+ {
+ ub4 rslinit;
+ /* Try to find distinct (A,B) for all keys */
+
+ rslinit = initkey(keys, nkeys, *tabb, *alen, *blen, *smax, trysalt,
+ form, final);
+
+ if (rslinit == 2)
+ { /* initkey actually found a perfect hash, not just distinct (a,b) */
+ *salt = 1;
+ *blen = 0;
+ break;
+ }
+ else if (rslinit == 0)
+ {
+ /* didn't find distinct (a,b) */
+ if (++bad_initkey >= RETRY_INITKEY)
+ {
+ /* Try to put more bits in (A,B) to make distinct (A,B) more likely */
+ if (*alen < maxalen)
+ {
+ *alen *= 2;
+ }
+ else if (*blen < *smax)
+ {
+ *blen *= 2;
+ free(tabq);
+ free(*tabb);
+ *tabb = (bstuff *)yasm_xmalloc((size_t)(sizeof(bstuff)*(*blen)));
+ tabq = (qstuff *)yasm_xmalloc((size_t)(sizeof(qstuff)*(*blen+1)));
+ }
+ else
+ {
+ duplicates(*tabb, *blen, keys, form); /* check for duplicates */
+ fprintf(stderr, "fatal error: Cannot perfect hash: cannot find distinct (A,B)\n");
+ exit(EXIT_FAILURE);
+ }
+ bad_initkey = 0;
+ bad_perfect = 0;
+ }
+ continue; /* two keys have same (a,b) pair */
+ }
+
+ /* Given distinct (A,B) for all keys, build a perfect hash */
+ if (!perfect(*tabb, *tabh, tabq, *blen, *smax, scramble, nkeys, form))
+ {
+ if ((form->hashtype != INT_HT && ++bad_perfect >= RETRY_PERFECT) ||
+ (form->hashtype == INT_HT && ++bad_perfect >= RETRY_HEX))
+ {
+ if (*blen < *smax)
+ {
+ *blen *= 2;
+ free(*tabb);
+ free(tabq);
+ *tabb = (bstuff *)yasm_xmalloc((size_t)(sizeof(bstuff)*(*blen)));
+ tabq = (qstuff *)yasm_xmalloc((size_t)(sizeof(qstuff)*(*blen+1)));
+ --trysalt; /* we know this salt got distinct (A,B) */
+ }
+ else
+ {
+ fprintf(stderr, "fatal error: Cannot perfect hash: cannot build tab[]\n");
+ exit(EXIT_FAILURE);
+ }
+ bad_perfect = 0;
+ }
+ continue;
+ }
+
+ *salt = trysalt;
+ break;
+ }
+
+ /* free working memory */
+ free((void *)tabq);
+}
+
+#if 0
+/*
+------------------------------------------------------------------------------
+Input/output type routines
+------------------------------------------------------------------------------
+*/
+
+/* get the list of keys */
+static void getkeys(keys, nkeys, textroot, keyroot, form)
+key **keys; /* list of all keys */
+ub4 *nkeys; /* number of keys */
+reroot *textroot; /* get space to store key text */
+reroot *keyroot; /* get space for keys */
+hashform *form; /* user directives */
+{
+ key *mykey;
+ char *mytext;
+ mytext = (char *)renew(textroot);
+ *keys = 0;
+ *nkeys = 0;
+ while (fgets(mytext, MAXKEYLEN, stdin))
+ {
+ mykey = (key *)renew(keyroot);
+ if (form->mode == AB_HM)
+ {
+ sscanf(mytext, "%lx %lx ", &mykey->a_k, &mykey->b_k);
+ }
+ else if (form->mode == ABDEC_HM)
+ {
+ sscanf(mytext, "%ld %ld ", &mykey->a_k, &mykey->b_k);
+ }
+ else if (form->mode == HEX_HM)
+ {
+ sscanf(mytext, "%lx ", &mykey->hash_k);
+ }
+ else if (form->mode == DECIMAL_HM)
+ {
+ sscanf(mytext, "%ld ", &mykey->hash_k);
+ }
+ else
+ {
+ mykey->name_k = (ub1 *)mytext;
+ mytext = (char *)renew(textroot);
+ mykey->len_k = (ub4)(strlen((char *)mykey->name_k)-1);
+ }
+ mykey->next_k = *keys;
+ *keys = mykey;
+ ++*nkeys;
+ }
+ redel(textroot, mytext);
+}
+
+/* make the .c file */
+static void make_c(tab, smax, blen, scramble, final, form)
+bstuff *tab; /* table indexed by b */
+ub4 smax; /* range of scramble[] */
+ub4 blen; /* b in 0..blen-1, power of 2 */
+ub4 *scramble; /* used in final hash */
+gencode *final; /* code for the final hash */
+hashform *form; /* user directives */
+{
+ ub4 i;
+ FILE *f;
+ f = fopen("phash.c", "w");
+ fprintf(f, "/* table for the mapping for the perfect hash */\n");
+ fprintf(f, "#include \"lookupa.h\"\n");
+ fprintf(f, "\n");
+ if (blen >= USE_SCRAMBLE)
+ {
+ fprintf(f, "/* A way to make the 1-byte values in tab bigger */\n");
+ if (smax > UB2MAXVAL+1)
+ {
+ fprintf(f, "unsigned long scramble[] = {\n");
+ for (i=0; i<=UB1MAXVAL; i+=4)
+ fprintf(f, "0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n",
+ scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
+ }
+ else
+ {
+ fprintf(f, "unsigned short scramble[] = {\n");
+ for (i=0; i<=UB1MAXVAL; i+=8)
+ fprintf(f,
+"0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n",
+ scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
+ scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
+ }
+ fprintf(f, "};\n");
+ fprintf(f, "\n");
+ }
+ if (blen > 0)
+ {
+ fprintf(f, "/* small adjustments to _a_ to make values distinct */\n");
+
+ if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
+ fprintf(f, "unsigned char tab[] = {\n");
+ else
+ fprintf(f, "unsigned short tab[] = {\n");
+
+ if (blen < 16)
+ {
+ for (i=0; i<blen; ++i) fprintf(f, "%3d,", scramble[tab[i].val_b]);
+ }
+ else if (blen <= 1024)
+ {
+ for (i=0; i<blen; i+=16)
+ fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+ scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
+ scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
+ scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
+ scramble[tab[i+6].val_b], scramble[tab[i+7].val_b],
+ scramble[tab[i+8].val_b], scramble[tab[i+9].val_b],
+ scramble[tab[i+10].val_b], scramble[tab[i+11].val_b],
+ scramble[tab[i+12].val_b], scramble[tab[i+13].val_b],
+ scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]);
+ }
+ else if (blen < USE_SCRAMBLE)
+ {
+ for (i=0; i<blen; i+=8)
+ fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+ scramble[tab[i+0].val_b], scramble[tab[i+1].val_b],
+ scramble[tab[i+2].val_b], scramble[tab[i+3].val_b],
+ scramble[tab[i+4].val_b], scramble[tab[i+5].val_b],
+ scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]);
+ }
+ else
+ {
+ for (i=0; i<blen; i+=16)
+ fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+ tab[i+0].val_b, tab[i+1].val_b,
+ tab[i+2].val_b, tab[i+3].val_b,
+ tab[i+4].val_b, tab[i+5].val_b,
+ tab[i+6].val_b, tab[i+7].val_b,
+ tab[i+8].val_b, tab[i+9].val_b,
+ tab[i+10].val_b, tab[i+11].val_b,
+ tab[i+12].val_b, tab[i+13].val_b,
+ tab[i+14].val_b, tab[i+15].val_b);
+ }
+ fprintf(f, "};\n");
+ fprintf(f, "\n");
+ }
+ fprintf(f, "/* The hash function */\n");
+ switch(form->mode)
+ {
+ case NORMAL_HM:
+ fprintf(f, "ub4 phash(key, len)\n");
+ fprintf(f, "char *key;\n");
+ fprintf(f, "int len;\n");
+ break;
+ case INLINE_HM:
+ case HEX_HM:
+ case DECIMAL_HM:
+ fprintf(f, "ub4 phash(val)\n");
+ fprintf(f, "ub4 val;\n");
+ break;
+ case AB_HM:
+ case ABDEC_HM:
+ fprintf(f, "ub4 phash(a,b)\n");
+ fprintf(f, "ub4 a;\n");
+ fprintf(f, "ub4 b;\n");
+ break;
+ }
+ fprintf(f, "{\n");
+ for (i=0; i<final->used; ++i)
+ fprintf(f, final->line[i]);
+ fprintf(f, " return rsl;\n");
+ fprintf(f, "}\n");
+ fprintf(f, "\n");
+ fclose(f);
+}
+
+/*
+------------------------------------------------------------------------------
+Read in the keys, find the hash, and write the .c and .h files
+------------------------------------------------------------------------------
+*/
+static void driver(form)
+hashform *form; /* user directives */
+{
+ ub4 nkeys; /* number of keys */
+ key *keys; /* head of list of keys */
+ bstuff *tab; /* table indexed by b */
+ ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */
+ ub4 alen; /* a in 0..alen-1, a power of 2 */
+ ub4 blen; /* b in 0..blen-1, a power of 2 */
+ ub4 salt; /* a parameter to the hash function */
+ reroot *textroot; /* MAXKEYLEN-character text lines */
+ reroot *keyroot; /* source of keys */
+ gencode final; /* code for final hash */
+ ub4 i;
+ ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */
+ char buf[10][80]; /* buffer for generated code */
+ char *buf2[10]; /* also for generated code */
+
+ /* set up memory sources */
+ textroot = remkroot((size_t)MAXKEYLEN);
+ keyroot = remkroot(sizeof(key));
+
+ /* set up code for final hash */
+ final.line = buf2;
+ final.used = 0;
+ final.len = 10;
+ for (i=0; i<10; ++i) final.line[i] = buf[i];
+
+ /* read in the list of keywords */
+ getkeys(&keys, &nkeys, textroot, keyroot, form);
+
+ /* find the hash */
+ findhash(&tab, &alen, &blen, &salt, &final,
+ scramble, &smax, keys, nkeys, form);
+
+ /* generate the phash.c file */
+ make_c(tab, smax, blen, scramble, &final, form);
+
+ /* clean up memory sources */
+ refree(textroot);
+ refree(keyroot);
+ free((void *)tab);
+}
+
+
+/* Interpret arguments and call the driver */
+/* See usage_error for the expected arguments */
+int main(argc, argv)
+int argc;
+char **argv;
+{
+ int mode_given = FALSE;
+ int minimal_given = FALSE;
+ int speed_given = FALSE;
+ hashform form;
+ char *c;
+
+ /* default behavior */
+ form.mode = NORMAL_HM;
+ form.hashtype = STRING_HT;
+ form.perfect = MINIMAL_HP;
+ form.speed = SLOW_HS;
+
+ /* Generate the [minimal] perfect hash */
+ driver(&form);
+
+ return EXIT_SUCCESS;
+}
+#endif
diff --git a/tools/genperf/perfect.h b/tools/genperf/perfect.h
new file mode 100644
index 0000000..b78d943
--- /dev/null
+++ b/tools/genperf/perfect.h
@@ -0,0 +1,132 @@
+/*
+------------------------------------------------------------------------------
+perfect.h: code to generate code for a hash for perfect hashing.
+(c) Bob Jenkins, September 1996
+You may use this code in any way you wish, and it is free. No warranty.
+I hereby place this in the public domain.
+Source is http://burtleburtle.net/bob/c/perfect.h
+------------------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+
+#ifndef PERFECT
+#define PERFECT
+
+#define MAXKEYLEN 30 /* maximum length of a key */
+#define USE_SCRAMBLE 4096 /* use scramble if blen >= USE_SCRAMBLE */
+#define SCRAMBLE_LEN ((ub4)1<<16) /* length of *scramble* */
+#define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */
+#define RETRY_PERFECT 1 /* number of times to try to make a perfect hash */
+#define RETRY_HEX 200 /* RETRY_PERFECT when hex keys given */
+
+/* the generated code for the final hash, assumes initial hash is done */
+struct gencode
+{
+ char **line; /* array of text lines, 80 bytes apiece */
+ /*
+ * The code placed here must declare "ub4 rsl"
+ * and assign it the value of the perfect hash using the function inputs.
+ * Later code will be tacked on which returns rsl or manipulates it according
+ * to the user directives.
+ *
+ * This code is at the top of the routine; it may and must declare any
+ * local variables it needs.
+ *
+ * Each way of filling in **line should be given a comment that is a unique
+ * tag. A testcase named with that tag should also be found which tests
+ * the generated code.
+ */
+ ub4 len; /* number of lines available for final hash */
+ ub4 used; /* number of lines used by final hash */
+
+ ub4 lowbit; /* for HEX, lowest interesting bit */
+ ub4 highbit; /* for HEX, highest interesting bit */
+ ub4 diffbits; /* bits which differ for some key */
+ ub4 i,j,k,l,m,n,o; /* state machine used in hexn() */
+};
+typedef struct gencode gencode;
+
+/* user directives: perfect hash? minimal perfect hash? input is an int? */
+struct hashform
+{
+ enum {
+ NORMAL_HM, /* key is a string */
+ INLINE_HM, /* user will do initial hash, we must choose salt for them */
+ HEX_HM, /* key to be hashed is a hexidecimal 4-byte integer */
+ DECIMAL_HM, /* key to be hashed is a decimal 4-byte integer */
+ AB_HM, /* key to be hashed is "A B", where A and B are (A,B) in hex */
+ ABDEC_HM /* like AB_HM, but in decimal */
+ } mode;
+ enum {
+ STRING_HT, /* key is a string */
+ INT_HT, /* key is an integer */
+ AB_HT /* dunno what key is, but input is distinct (A,B) pair */
+ } hashtype;
+ enum {
+ NORMAL_HP, /* just find a perfect hash */
+ MINIMAL_HP /* find a minimal perfect hash */
+ } perfect;
+ enum {
+ FAST_HS, /* fast mode */
+ SLOW_HS /* slow mode */
+ } speed;
+};
+typedef struct hashform hashform;
+
+/* representation of a key */
+struct key
+{
+ char *name_k; /* the actual key */
+ ub4 len_k; /* the length of the actual key */
+ ub4 hash_k; /* the initial hash value for this key */
+ struct key *next_k; /* next key */
+/* beyond this point is mapping-dependent */
+ ub4 a_k; /* a, of the key maps to (a,b) */
+ ub4 b_k; /* b, of the key maps to (a,b) */
+ struct key *nextb_k; /* next key with this b */
+};
+typedef struct key key;
+
+/* things indexed by b of original (a,b) pair */
+struct bstuff
+{
+ ub2 val_b; /* hash=a^tabb[b].val_b */
+ key *list_b; /* tabb[i].list_b is list of keys with b==i */
+ ub4 listlen_b; /* length of list_b */
+ ub4 water_b; /* high watermark of who has visited this map node */
+};
+typedef struct bstuff bstuff;
+
+/* things indexed by final hash value */
+struct hstuff
+{
+ key *key_h; /* tabh[i].key_h is the key with a hash of i */
+};
+typedef struct hstuff hstuff;
+
+/* things indexed by queue position */
+struct qstuff
+{
+ bstuff *b_q; /* b that currently occupies this hash */
+ ub4 parent_q; /* queue position of parent that could use this hash */
+ ub2 newval_q; /* what to change parent tab[b] to to use this hash */
+ ub2 oldval_q; /* original value of tab[b] */
+};
+typedef struct qstuff qstuff;
+
+/* return ceiling(log based 2 of x) */
+ub4 phash_log2(ub4 x);
+
+/* Given the keys, scramble[], and hash mode, find the perfect hash */
+void findhash(bstuff **tabb, hstuff **tabh, ub4 *alen, ub4 *blen, ub4 *salt,
+ gencode *final, ub4 *scramble, ub4 *smax, key *keys, ub4 nkeys,
+ hashform *form);
+
+/* private, but in a different file because it's excessively verbose */
+int inithex(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub4 salt,
+ gencode *final, hashform *form);
+
+#endif /* PERFECT */
diff --git a/tools/genperf/standard.h b/tools/genperf/standard.h
new file mode 100644
index 0000000..596b893
--- /dev/null
+++ b/tools/genperf/standard.h
@@ -0,0 +1,35 @@
+/*
+------------------------------------------------------------------------------
+Standard definitions and types, Bob Jenkins
+------------------------------------------------------------------------------
+*/
+#ifndef STANDARD
+#define STANDARD
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+typedef unsigned long int ub4; /* unsigned 4-byte quantities */
+#define UB4BITS 32
+typedef unsigned short int ub2;
+#define UB2MAXVAL 0xffff
+typedef unsigned char ub1;
+#define UB1MAXVAL 0xff
+typedef int word; /* fastest type available */
+
+#define bis(target,mask) ((target) |= (mask))
+#define bic(target,mask) ((target) &= ~(mask))
+#define bit(target,mask) ((target) & (mask))
+#ifndef align
+# define align(a) (((ub4)a+(sizeof(void *)-1))&(~(sizeof(void *)-1)))
+#endif /* align */
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#endif /* STANDARD */
diff --git a/tools/python-yasm/Makefile.inc b/tools/python-yasm/Makefile.inc
new file mode 100644
index 0000000..fce135b
--- /dev/null
+++ b/tools/python-yasm/Makefile.inc
@@ -0,0 +1,79 @@
+PYBINDING_DEPS = tools/python-yasm/bytecode.pxi
+PYBINDING_DEPS += tools/python-yasm/errwarn.pxi
+PYBINDING_DEPS += tools/python-yasm/expr.pxi
+PYBINDING_DEPS += tools/python-yasm/floatnum.pxi
+PYBINDING_DEPS += tools/python-yasm/intnum.pxi
+PYBINDING_DEPS += tools/python-yasm/symrec.pxi
+PYBINDING_DEPS += tools/python-yasm/value.pxi
+
+EXTRA_DIST += tools/python-yasm/pyxelator/cparse.py
+EXTRA_DIST += tools/python-yasm/pyxelator/genpyx.py
+EXTRA_DIST += tools/python-yasm/pyxelator/ir.py
+EXTRA_DIST += tools/python-yasm/pyxelator/lexer.py
+EXTRA_DIST += tools/python-yasm/pyxelator/node.py
+EXTRA_DIST += tools/python-yasm/pyxelator/parse_core.py
+EXTRA_DIST += tools/python-yasm/pyxelator/work_unit.py
+EXTRA_DIST += tools/python-yasm/pyxelator/wrap_yasm.py
+EXTRA_DIST += tools/python-yasm/setup.py
+EXTRA_DIST += tools/python-yasm/yasm.pyx
+EXTRA_DIST += $(PYBINDING_DEPS)
+
+if HAVE_PYTHON_BINDINGS
+
+# Use Pyxelator to generate Pyrex function headers.
+_yasm.pxi: ${HEADERS}
+ @rm -rf .tmp
+ @mkdir .tmp
+ $(PYTHON) $(srcdir)/tools/python-yasm/pyxelator/wrap_yasm.py \
+ "YASM_DIR=${srcdir}" "CPP=${CPP}" "CPPFLAGS=${CPPFLAGS}"
+ @rm -rf .tmp
+
+CLEANFILES += _yasm.pxi
+
+# Need to build a local copy of the main Pyrex input file to include _yasm.pxi
+# from the build directory. Also need to fixup the other .pxi include paths.
+yasm.pyx: $(srcdir)/tools/python-yasm/yasm.pyx
+ sed -e 's,^include "\([^_]\),include "${srcdir}/tools/python-yasm/\1,' \
+ $(srcdir)/tools/python-yasm/yasm.pyx > $@
+
+CLEANFILES += yasm.pyx
+
+# Actually run Cython
+yasm_python.c: yasm.pyx _yasm.pxi $(PYBINDING_DEPS)
+ $(PYTHON) -c "from Cython.Compiler.Main import main; main(command_line=1)" \
+ -o $@ yasm.pyx
+
+CLEANFILES += yasm_python.c
+
+# Now the Python build magic...
+python-setup.txt: Makefile
+ echo "includes=${DEFS} ${DEFAULT_INCLUDES} ${INCLUDES} ${AM_CPPFLAGS} ${CPPFLAGS}" > python-setup.txt
+ echo "sources=${libyasm_a_SOURCES} ${nodist_libyasm_a_SOURCES}" >> python-setup.txt
+ echo "srcdir=${srcdir}" >> python-setup.txt
+ echo "gcc=${GCC}" >> python-setup.txt
+
+CLEANFILES += python-setup.txt
+
+.python-build: python-setup.txt yasm_python.c ${libyasm_a_SOURCES} ${nodist_libyasm_a_SOURCES}
+ $(PYTHON) `test -f tools/python-yasm/setup.py || echo '$(srcdir)/'`tools/python-yasm/setup.py build
+ touch .python-build
+python-build: .python-build
+
+CLEANFILES += .python-build
+
+python-install: .python-build
+ $(PYTHON) `test -f tools/python-yasm/setup.py || echo '$(srcdir)/'`tools/python-yasm/setup.py install "--install-lib=$(DESTDIR)$(pythondir)"
+
+python-uninstall:
+ rm -f `$(PYTHON) -c "import sys;sys.path.insert(0, '${DESTDIR}${pythondir}'); import yasm; print yasm.__file__"`
+
+else
+
+python-build:
+python-install:
+python-uninstall:
+
+endif
+
+EXTRA_DIST += tools/python-yasm/tests/Makefile.inc
+include tools/python-yasm/tests/Makefile.inc
diff --git a/tools/python-yasm/bytecode.pxi b/tools/python-yasm/bytecode.pxi
new file mode 100644
index 0000000..34aeaa5
--- /dev/null
+++ b/tools/python-yasm/bytecode.pxi
@@ -0,0 +1,107 @@
+# Python bindings for Yasm: Pyrex input file for bytecode.h
+#
+# Copyright (C) 2006 Michael Urman, Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+cdef class Bytecode:
+ cdef yasm_bytecode *bc
+
+ cdef object __weakref__ # make weak-referenceable
+
+ def __cinit__(self, bc):
+ self.bc = NULL
+ if PyCObject_Check(bc):
+ self.bc = <yasm_bytecode *>__get_voidp(bc, Bytecode)
+ else:
+ raise NotImplementedError
+
+ def __dealloc__(self):
+ # Only free if we're not part of a section; if we're part of a section
+ # the section takes care of freeing the bytecodes.
+ if self.bc.section == NULL:
+ yasm_bc_destroy(self.bc)
+
+ property len:
+ def __get__(self): return self.bc.len
+ def __set__(self, value): self.bc.len = value
+ property mult_int:
+ def __get__(self): return self.bc.mult_int
+ def __set__(self, value): self.bc.mult_int = value
+ property line:
+ def __get__(self): return self.bc.line
+ def __set__(self, value): self.bc.line = value
+ property offset:
+ def __get__(self): return self.bc.offset
+ def __set__(self, value): self.bc.offset = value
+ property bc_index:
+ def __get__(self): return self.bc.bc_index
+ def __set__(self, value): self.bc.bc_index = value
+ property symbols:
+ # Someday extend this to do something modifiable, e.g. return a
+ # list-like object.
+ def __get__(self):
+ cdef yasm_symrec *sym
+ cdef int i
+ if self.bc.symrecs == NULL:
+ return []
+ s = []
+ i = 0
+ sym = self.bc.symrecs[i]
+ while sym != NULL:
+ s.append(__make_symbol(sym))
+ i = i+1
+ sym = self.bc.symrecs[i]
+ return s
+
+#
+# Keep Bytecode reference paired with bc using weak references.
+# This is broken in Pyrex 0.9.4.1; Pyrex 0.9.5 has a working version.
+#
+
+from weakref import WeakValueDictionary as __weakvaldict
+__bytecode_map = __weakvaldict()
+#__bytecode_map = {}
+
+cdef object __make_bytecode(yasm_bytecode *bc):
+ __error_check()
+ vptr = PyCObject_FromVoidPtr(bc, NULL)
+ data = __bytecode_map.get(vptr, None)
+ if data:
+ return data
+ bcobj = Bytecode(__pass_voidp(bc, Bytecode))
+ __bytecode_map[vptr] = bcobj
+ return bcobj
+
+# Org bytecode
+def __org__new__(cls, start, value=0, line=0):
+ cdef yasm_bytecode *bc
+ bc = yasm_bc_create_org(start, line, value)
+ obj = Bytecode.__new__(cls, __pass_voidp(bc, Bytecode))
+ __bytecode_map[PyCObject_FromVoidPtr(bc, NULL)] = obj
+ return obj
+__org__new__ = staticmethod(__org__new__)
+class Org(Bytecode):
+ __new__ = __org__new__
+
+
+#cdef class Section:
diff --git a/tools/python-yasm/errwarn.pxi b/tools/python-yasm/errwarn.pxi
new file mode 100644
index 0000000..9568cc0
--- /dev/null
+++ b/tools/python-yasm/errwarn.pxi
@@ -0,0 +1,73 @@
+# Python bindings for Yasm: Pyrex input file for errwarn.h
+#
+# Copyright (C) 2006 Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+class YasmError(Exception): pass
+
+cdef int __error_check() except 1:
+ cdef yasm_error_class errclass
+ cdef unsigned long xrefline
+ cdef char *errstr, *xrefstr
+
+ # short path for the common case
+ if not <int>yasm_error_occurred():
+ return 0
+
+ # look up our preferred python error, fall back to YasmError
+ # Order matters here. Go from most to least specific within a class
+ if yasm_error_matches(YASM_ERROR_ZERO_DIVISION):
+ exception = ZeroDivisionError
+ # Enable these once there are tests that need them.
+ #elif yasm_error_matches(YASM_ERROR_OVERFLOW):
+ # exception = OverflowError
+ #elif yasm_error_matches(YASM_ERROR_FLOATING_POINT):
+ # exception = FloatingPointError
+ #elif yasm_error_matches(YASM_ERROR_ARITHMETIC):
+ # exception = ArithmeticError
+ #elif yasm_error_matches(YASM_ERROR_ASSERTION):
+ # exception = AssertionError
+ #elif yasm_error_matches(YASM_ERROR_VALUE):
+ # exception = ValueError # include notabs, notconst, toocomplex
+ #elif yasm_error_matches(YASM_ERROR_IO):
+ # exception = IOError
+ #elif yasm_error_matches(YASM_ERROR_NOT_IMPLEMENTED):
+ # exception = NotImplementedError
+ #elif yasm_error_matches(YASM_ERROR_TYPE):
+ # exception = TypeError
+ #elif yasm_error_matches(YASM_ERROR_SYNTAX):
+ # exception = SyntaxError #include parse
+ else:
+ exception = YasmError
+
+ # retrieve info (clears error)
+ yasm_error_fetch(&errclass, &errstr, &xrefline, &xrefstr)
+
+ if xrefline and xrefstr:
+ PyErr_Format(exception, "%s: %d: %s", errstr, xrefline, xrefstr)
+ else:
+ PyErr_SetString(exception, errstr)
+
+ if xrefstr: free(xrefstr)
+ free(errstr)
+ return 1
diff --git a/tools/python-yasm/expr.pxi b/tools/python-yasm/expr.pxi
new file mode 100644
index 0000000..995a46a
--- /dev/null
+++ b/tools/python-yasm/expr.pxi
@@ -0,0 +1,136 @@
+# Python bindings for Yasm: Pyrex input file for expr.h
+#
+# Copyright (C) 2006 Michael Urman, Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+cdef extern from *:
+ # Defined as a macro, so not automatically brought in by pyxelator
+ cdef yasm_expr *yasm_expr_simplify(yasm_expr *e, int calc_bc_dist)
+
+import operator
+__op = {}
+for ops, operation in [
+ ((operator.__add__, operator.add, '+'), YASM_EXPR_ADD),
+ ((operator.__and__, operator.and_, '&'), YASM_EXPR_AND),
+ ((operator.__div__, operator.div, '/'), YASM_EXPR_SIGNDIV),
+ ((operator.__floordiv__, operator.floordiv, '//'), YASM_EXPR_SIGNDIV),
+ ((operator.__ge__, operator.ge, '>='), YASM_EXPR_GE),
+ ((operator.__gt__, operator.gt, '>'), YASM_EXPR_GT),
+ ((operator.__inv__, operator.inv, '~'), YASM_EXPR_NOT),
+ ((operator.__invert__, operator.invert), YASM_EXPR_NOT),
+ ((operator.__le__, operator.le, '<='), YASM_EXPR_LE),
+ ((operator.__lt__, operator.lt, '<'), YASM_EXPR_LT),
+ ((operator.__mod__, operator.mod, '%'), YASM_EXPR_SIGNMOD),
+ ((operator.__mul__, operator.mul, '*'), YASM_EXPR_MUL),
+ ((operator.__neg__, operator.neg), YASM_EXPR_NEG),
+ ((operator.__not__, operator.not_, 'not'), YASM_EXPR_LNOT),
+ ((operator.__or__, operator.or_, '|'), YASM_EXPR_OR),
+ ((operator.__sub__, operator.sub, '-'), YASM_EXPR_SUB),
+ ((operator.__xor__, operator.xor, '^'), YASM_EXPR_XOR),
+ ]:
+ for op in ops:
+ __op[op] = operation
+
+del operator, op, ops, operation
+
+cdef object __make_expression(yasm_expr *expr):
+ return Expression(__pass_voidp(expr, Expression))
+
+cdef class Expression:
+ cdef yasm_expr *expr
+
+ def __cinit__(self, op, *args, **kwargs):
+ self.expr = NULL
+
+ if isinstance(op, Expression):
+ self.expr = yasm_expr_copy((<Expression>op).expr)
+ return
+ if PyCObject_Check(op):
+ self.expr = <yasm_expr *>__get_voidp(op, Expression)
+ return
+
+ cdef size_t numargs
+ cdef unsigned long line
+
+ op = __op.get(op, op)
+ numargs = len(args)
+ line = kwargs.get('line', 0)
+
+ if numargs == 0 or numargs > 2:
+ raise NotImplementedError
+ elif numargs == 2:
+ self.expr = yasm_expr_create(op, self.__new_item(args[0]),
+ self.__new_item(args[1]), line)
+ else:
+ self.expr = yasm_expr_create(op, self.__new_item(args[0]), NULL,
+ line)
+
+ cdef yasm_expr__item* __new_item(self, value) except NULL:
+ cdef yasm_expr__item *retval
+ if isinstance(value, Expression):
+ return yasm_expr_expr(yasm_expr_copy((<Expression>value).expr))
+ #elif isinstance(value, Symbol):
+ # return yasm_expr_sym((<Symbol>value).sym)
+ #elif isinstance(value, Register):
+ # return yasm_expr_reg((<Register>value).reg)
+ elif isinstance(value, FloatNum):
+ return yasm_expr_float(yasm_floatnum_copy((<FloatNum>value).flt))
+ elif isinstance(value, IntNum):
+ return yasm_expr_int(yasm_intnum_copy((<IntNum>value).intn))
+ else:
+ try:
+ intnum = IntNum(value)
+ except:
+ raise ValueError("Invalid item value type '%s'" % type(value))
+ else:
+ retval = yasm_expr_int((<IntNum>intnum).intn)
+ (<IntNum>intnum).intn = NULL
+ return retval
+
+ def __dealloc__(self):
+ if self.expr != NULL: yasm_expr_destroy(self.expr)
+
+ def simplify(self, calc_bc_dist=False):
+ self.expr = yasm_expr_simplify(self.expr, calc_bc_dist)
+
+ def extract_segoff(self):
+ cdef yasm_expr *retval
+ retval = yasm_expr_extract_segoff(&self.expr)
+ if retval == NULL:
+ raise ValueError("not a SEG:OFF expression")
+ return __make_expression(retval)
+
+ def extract_wrt(self):
+ cdef yasm_expr *retval
+ retval = yasm_expr_extract_wrt(&self.expr)
+ if retval == NULL:
+ raise ValueError("not a WRT expression")
+ return __make_expression(retval)
+
+ def get_intnum(self, calc_bc_dist=False):
+ cdef yasm_intnum *retval
+ retval = yasm_expr_get_intnum(&self.expr, calc_bc_dist)
+ if retval == NULL:
+ raise ValueError("not an intnum expression")
+ return __make_intnum(yasm_intnum_copy(retval))
+
diff --git a/tools/python-yasm/floatnum.pxi b/tools/python-yasm/floatnum.pxi
new file mode 100644
index 0000000..b7bfbc3
--- /dev/null
+++ b/tools/python-yasm/floatnum.pxi
@@ -0,0 +1,49 @@
+# Python bindings for Yasm: Pyrex input file for floatnum.h
+#
+# Copyright (C) 2006 Michael Urman, Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+cdef class FloatNum:
+ cdef yasm_floatnum *flt
+ def __cinit__(self, value):
+ self.flt = NULL
+ if isinstance(value, FloatNum):
+ self.flt = yasm_floatnum_copy((<FloatNum>value).flt)
+ return
+ if PyCObject_Check(value): # should check Desc
+ self.flt = <yasm_floatnum *>PyCObject_AsVoidPtr(value)
+ return
+
+ if isinstance(value, float): string = str(float)
+ else: string = value
+ self.flt = yasm_floatnum_create(string)
+
+ def __dealloc__(self):
+ if self.flt != NULL: yasm_floatnum_destroy(self.flt)
+
+ def __neg__(self):
+ result = FloatNum(self)
+ yasm_floatnum_calc((<FloatNum>result).flt, YASM_EXPR_NEG, NULL)
+ return result
+ def __pos__(self): return self
+
diff --git a/tools/python-yasm/intnum.pxi b/tools/python-yasm/intnum.pxi
new file mode 100644
index 0000000..320ca1b
--- /dev/null
+++ b/tools/python-yasm/intnum.pxi
@@ -0,0 +1,170 @@
+# Python bindings for Yasm: Pyrex input file for intnum.h
+#
+# Copyright (C) 2006 Michael Urman, Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+cdef class IntNum
+
+cdef object __intnum_op_ex(object x, yasm_expr_op op, object y):
+ value = __intnum_op(x, op, y)
+ __error_check()
+ return value
+
+cdef object __intnum_op(object x, yasm_expr_op op, object y):
+ if isinstance(x, IntNum):
+ result = IntNum(x)
+ if y is None:
+ yasm_intnum_calc((<IntNum>result).intn, op, NULL)
+ else:
+ # Coerce to intnum if not already
+ if isinstance(y, IntNum):
+ rhs = y
+ else:
+ rhs = IntNum(y)
+ yasm_intnum_calc((<IntNum>result).intn, op, (<IntNum>rhs).intn)
+ return result
+ elif isinstance(y, IntNum):
+ # Reversed operation - x OP y still, just y is intnum, x isn't.
+ result = IntNum(x)
+ yasm_intnum_calc((<IntNum>result).intn, op, (<IntNum>y).intn)
+ return result
+ else:
+ raise NotImplementedError
+
+cdef object __make_intnum(yasm_intnum *intn):
+ return IntNum(__pass_voidp(intn, IntNum))
+
+cdef class IntNum:
+ cdef yasm_intnum *intn
+
+ def __cinit__(self, value, base=None):
+ cdef unsigned char buf[16]
+
+ self.intn = NULL
+
+ if isinstance(value, IntNum):
+ self.intn = yasm_intnum_copy((<IntNum>value).intn)
+ return
+ if PyCObject_Check(value):
+ self.intn = <yasm_intnum *>__get_voidp(value, IntNum)
+ return
+
+ if isinstance(value, str):
+ if base == 2:
+ self.intn = yasm_intnum_create_bin(value)
+ elif base == 8:
+ self.intn = yasm_intnum_create_oct(value)
+ elif base == 10 or base is None:
+ self.intn = yasm_intnum_create_dec(value)
+ elif base == 16:
+ self.intn = yasm_intnum_create_hex(value)
+ elif base == "nasm":
+ self.intn = yasm_intnum_create_charconst_nasm(value)
+ else:
+ raise ValueError("base must be 2, 8, 10, 16, or \"nasm\"")
+ elif isinstance(value, (int, long)):
+ _PyLong_AsByteArray(long(value), buf, 16, 1, 1)
+ self.intn = yasm_intnum_create_sized(buf, 1, 16, 0)
+ else:
+ raise ValueError
+
+ def __dealloc__(self):
+ if self.intn != NULL: yasm_intnum_destroy(self.intn)
+
+ def __long__(self):
+ cdef unsigned char buf[16]
+ yasm_intnum_get_sized(self.intn, buf, 16, 128, 0, 0, 0)
+ return _PyLong_FromByteArray(buf, 16, 1, 1)
+
+ def __repr__(self):
+ return "IntNum(%d)" % self
+
+ def __int__(self): return int(self.__long__())
+ def __complex__(self): return complex(self.__long__())
+ def __float__(self): return float(self.__long__())
+
+ def __oct__(self): return oct(int(self.__long__()))
+ def __hex__(self): return hex(int(self.__long__()))
+
+ def __add__(x, y): return __intnum_op(x, YASM_EXPR_ADD, y)
+ def __sub__(x, y): return __intnum_op(x, YASM_EXPR_SUB, y)
+ def __mul__(x, y): return __intnum_op(x, YASM_EXPR_MUL, y)
+ def __div__(x, y): return __intnum_op_ex(x, YASM_EXPR_SIGNDIV, y)
+ def __floordiv__(x, y): return __intnum_op_ex(x, YASM_EXPR_SIGNDIV, y)
+ def __mod__(x, y): return __intnum_op_ex(x, YASM_EXPR_SIGNMOD, y)
+ def __neg__(self): return __intnum_op(self, YASM_EXPR_NEG, None)
+ def __pos__(self): return self
+ def __abs__(self):
+ if yasm_intnum_sign(self.intn) >= 0: return IntNum(self)
+ else: return __intnum_op(self, YASM_EXPR_NEG, None)
+ def __nonzero__(self): return not yasm_intnum_is_zero(self.intn)
+ def __invert__(self): return __intnum_op(self, YASM_EXPR_NOT, None)
+ def __lshift__(x, y): return __intnum_op(x, YASM_EXPR_SHL, y)
+ def __rshift__(x, y): return __intnum_op(x, YASM_EXPR_SHR, y)
+ def __and__(x, y): return __intnum_op(x, YASM_EXPR_AND, y)
+ def __or__(x, y): return __intnum_op(x, YASM_EXPR_OR, y)
+ def __xor__(x, y): return __intnum_op(x, YASM_EXPR_XOR, y)
+
+ cdef object __op(self, yasm_expr_op op, object x):
+ if isinstance(x, IntNum):
+ rhs = x
+ else:
+ rhs = IntNum(x)
+ yasm_intnum_calc(self.intn, op, (<IntNum>rhs).intn)
+ return self
+
+ def __iadd__(self, x): return self.__op(YASM_EXPR_ADD, x)
+ def __isub__(self, x): return self.__op(YASM_EXPR_SUB, x)
+ def __imul__(self, x): return self.__op(YASM_EXPR_MUL, x)
+ def __idiv__(self, x): return self.__op(YASM_EXPR_SIGNDIV, x)
+ def __ifloordiv__(self, x): return self.__op(YASM_EXPR_SIGNDIV, x)
+ def __imod__(self, x): return self.__op(YASM_EXPR_MOD, x)
+ def __ilshift__(self, x): return self.__op(YASM_EXPR_SHL, x)
+ def __irshift__(self, x): return self.__op(YASM_EXPR_SHR, x)
+ def __iand__(self, x): return self.__op(YASM_EXPR_AND, x)
+ def __ior__(self, x): return self.__op(YASM_EXPR_OR, x)
+ def __ixor__(self, x): return self.__op(YASM_EXPR_XOR, x)
+
+ def __cmp__(self, x):
+ cdef yasm_intnum *t
+ t = yasm_intnum_copy(self.intn)
+ if isinstance(x, IntNum):
+ rhs = x
+ else:
+ rhs = IntNum(x)
+ yasm_intnum_calc(t, YASM_EXPR_SUB, (<IntNum>rhs).intn)
+ result = yasm_intnum_sign(t)
+ yasm_intnum_destroy(t)
+ return result
+
+ def __richcmp__(x, y, op):
+ cdef yasm_expr_op aop
+ if op == 0: aop = YASM_EXPR_LT
+ elif op == 1: aop = YASM_EXPR_LE
+ elif op == 2: aop = YASM_EXPR_EQ
+ elif op == 3: aop = YASM_EXPR_NE
+ elif op == 4: aop = YASM_EXPR_GT
+ elif op == 5: aop = YASM_EXPR_GE
+ else: raise NotImplementedError
+ v = __intnum_op(x, aop, y)
+ return bool(not yasm_intnum_is_zero((<IntNum>v).intn))
diff --git a/tools/python-yasm/pyxelator/cparse.py b/tools/python-yasm/pyxelator/cparse.py
new file mode 100755
index 0000000..53754f8
--- /dev/null
+++ b/tools/python-yasm/pyxelator/cparse.py
@@ -0,0 +1,819 @@
+#!/usr/bin/env python
+
+"""
+
+(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
+Released under GNU LGPL license.
+
+"""
+
+import sys
+
+from lexer import Lexer
+from parse_core import Symbols, Parser
+import node as node_module
+
+
+class Node(node_module.Node):
+
+ def is_typedef(self):
+ for x in self:
+ if isinstance(x,Node):
+ if x.is_typedef():
+ return 1
+ return 0
+
+ #def explain(self):
+ #l = []
+ #for x in self:
+ #if isinstance(x,Node):
+ #l.append(x.explain())
+ #else:
+ #l.append(str(x))
+ #return string.join(l," ")
+ ##(self.__class__.__name__,string.join(l) )
+
+ def psource(self):
+ if hasattr(self,'lines'):
+ print "# "+string.join(self.lines,"\n# ")+"\n"
+
+
+###################################################################
+#
+###################################################################
+#
+
+
+class BasicType(Node):
+ " int char short etc. "
+ def __init__(self,name):
+ Node.__init__(self,name)
+
+class Qualifier(Node):
+ """
+ """
+ def __init__(self,name):
+ Node.__init__(self,name)
+ self.name=name
+
+class StorageClass(Node):
+ """
+ """
+ def __init__(self,name):
+ Node.__init__(self,name)
+ self.name=name
+
+class Typedef(StorageClass):
+ """
+ """
+ def __init__(self,s='typedef'):
+ Node.__init__(self,s)
+ #def explain(self):
+ #return "type"
+
+class Ellipses(Node):
+ """
+ """
+ def __init__(self,s='...'):
+ Node.__init__(self,s)
+
+class GCCBuiltin(BasicType):
+ """
+ """
+ pass
+
+
+class Identifier(Node):
+ """
+ """
+ def __init__(self,name="",*items):
+ if name or 1:
+ Node.__init__(self,name,*items)
+ else:
+ Node.__init__(self)
+ self.name=name
+
+class Function(Node,Parser):
+ """
+ """
+ def __init__(self,*items):
+ Node.__init__(self,*items)
+
+ def parse(self,lexer,symbols):
+ symbols = Symbols(symbols)
+ args = ''
+ #lexer.get_token()
+ if lexer.tok != ')':
+ if not lexer.tok:
+ self.parse_error(lexer)
+ #lexer.unget_token() # unget start of decl
+ while lexer.tok != ')':
+ node = ParameterDeclaration()
+ node.parse(lexer,symbols)
+ self.append( node )
+ if lexer.tok != ')' and lexer.tok != ',':
+ self.parse_error(lexer)
+ if lexer.tok == ',':
+ lexer.get_token()
+ lexer.get_token()
+
+
+class Pointer(Node):
+ """
+ """
+ def __init__(self,*items):
+ Node.__init__(self,*items)
+
+class Array(Node,Parser):
+ """
+ """
+ def __init__(self,*items):
+ Node.__init__(self,*items)
+
+ def parse(self,lexer,symbols):
+ lexer.get_token() # a number or ']'
+ # XX
+ # HACK HACK: constant c expressions can appear in here:
+ # eg. [ 15 * sizeof (int) - 2 * sizeof (void *) ]
+ # XX
+ toks = []
+ while lexer.tok != ']':
+ #self.append( lexer.kind )
+ toks.append( lexer.tok )
+ lexer.get_token()
+ child = " ".join(toks)
+ if child == "":
+ child = None
+ self.append( child )
+ lexer.get_token() # read past the ']'
+
+class Tag(Node):
+ """
+ """
+ pass
+
+
+class Compound(Node,Parser):
+ "Struct or Union"
+
+ def __init__(self,*items,**kw):
+ Node.__init__(self,*items,**kw)
+
+ def parse(self,lexer,symbols):
+ symbols = Symbols(symbols)
+ tag = "" # anonymous
+ if lexer.tok != '{':
+ tag = lexer.tok
+ if not ( tag[0]=='_' or tag[0].isalpha() ):
+ self.parse_error(lexer ,"expected tag, got '%s'"%tag )
+ lexer.get_token()
+ if tag:
+ self.append(Tag(tag))
+ else:
+ self.append(Tag())
+ self.tag = tag
+ if lexer.tok == '{':
+ fieldlist = []
+ lexer.get_token()
+ if lexer.tok != '}':
+ if not lexer.tok: self.parse_error(lexer)
+ while lexer.tok != '}':
+ node = StructDeclaration()
+ node.parse(lexer,symbols)
+ fieldlist.append( node )
+ self += fieldlist
+ lexer.get_token()
+ if self.verbose:
+ print "%s.__init__() #<--"%(self)
+
+class Struct(Compound):
+ """
+ """
+ pass
+
+class Union(Compound):
+ """
+ """
+ pass
+
+class Enum(Node,Parser):
+ """
+ """
+ def __init__(self,*items,**kw):
+ Node.__init__(self,*items,**kw)
+
+ def parse(self,lexer,symbols):
+ tag = "" # anonymous
+ if lexer.tok != '{':
+ tag = lexer.tok
+ if not ( tag[0]=='_' or tag[0].isalpha() ):
+ self.parse_error(lexer ,"expected tag, got '%s'"%tag )
+ lexer.get_token()
+ if tag:
+ self.append(Tag(tag))
+ else:
+ self.append(Tag())
+ self.tag = tag
+ if lexer.tok == '{':
+ lexer.get_token()
+ if lexer.tok != '}': # XX dopey control flow
+ if not lexer.tok: # XX dopey control flow
+ self.parse_error(lexer) # XX dopey control flow
+ while lexer.tok != '}': # XX dopey control flow
+ if lexer.kind is not None:
+ self.expected_error(lexer ,"identifier" )
+ ident = Identifier(lexer.tok)
+ if symbols[ident[0]] is not None:
+ self.parse_error(lexer,"%s already defined."%ident[0])
+ symbols[ident[0]]=ident
+ self.append( ident )
+ lexer.get_token()
+ if lexer.tok == '=':
+ lexer.get_token()
+ # ConstantExpr
+ # XX hack hack XX
+ while lexer.tok!=',' and lexer.tok!='}':
+ lexer.get_token()
+# if type( lexer.kind ) is not int:
+# #self.parse_error(lexer ,"expected integer" )
+# # XX hack hack XX
+# while lexer.tok!=',' and lexer.tok!='}':
+# lexer.get_token()
+# else:
+# # put initializer into the Identifier
+# ident.append( lexer.kind )
+# lexer.get_token()
+ if lexer.tok != '}':
+ if lexer.tok != ',':
+ self.expected_error(lexer,"}",",")
+ lexer.get_token() # ','
+ lexer.get_token()
+ if self.verbose:
+ print "%s.__init__() #<--"%(self)
+
+
+
+class Declarator(Node,Parser):
+ """
+ """
+ def __init__(self,*items):
+ Node.__init__(self,*items)
+ self.ident = None
+
+ def parse(self,lexer,symbols):
+ #Parser.parse_enter(self,lexer)
+ stack = []
+ # read up to identifier, pushing tokens onto stack
+ self.ident = self.parse_identifier(lexer,symbols,stack)
+ self.name = ''
+ if self.ident is not None:
+ self.append( self.ident )
+ self.name = self.ident.name
+ # now read outwards from identifier
+ self.parse_declarator(lexer,symbols,stack)
+ #Parser.parse_leave(self,lexer)
+
+ def parse_identifier(self,lexer,symbols,stack):
+ if self.verbose:
+ print "%s.parse_identifier()"%self
+ ident = None
+ if lexer.tok != ';':
+ while lexer.tok and lexer.kind is not None:
+ stack.append( (lexer.tok, lexer.kind) )
+ lexer.get_token()
+ if lexer.tok:
+ ident = Identifier( lexer.tok )
+ #stack.append( (ident.name, ident) )
+ lexer.get_token()
+ if self.verbose:
+ print "%s.parse_identifier()=%s"%(self,repr(ident))
+ return ident
+
+ def parse_declarator(self,lexer,symbols,stack,level=0):
+ if self.verbose:
+ print " "*level+"%s.parse_declarator(%s) # --->"%\
+ (self,stack)
+ if lexer.tok == '[':
+ while lexer.tok == '[':
+ node = Array()
+ node.parse(lexer,symbols)
+ self.append(node)
+ if lexer.tok == '(':
+ self.parse_error(lexer ,"array of functions" )
+ elif lexer.tok == '(':
+ lexer.get_token()
+ node = Function()
+ node.parse(lexer,symbols)
+ self.append( node )
+ if lexer.tok == '(':
+ self.parse_error(lexer ,"function returns a function" )
+ if lexer.tok == '[':
+ self.parse_error(lexer ,"function returns an array" )
+ while stack:
+ tok, kind = stack[-1] # peek
+ if tok == '(':
+ stack.pop()
+ self.consume(lexer,')')
+ self.parse_declarator(lexer,symbols,stack,level+1)
+ elif tok == '*':
+ stack.pop()
+ self.append( Pointer() )
+ else:
+ tok, kind = stack.pop()
+ self.append( kind )
+ if self.verbose:
+ print " "*level+"%s.parse_declarator(%s) # <---"%\
+ (self,stack)
+
+
+class AbstractDeclarator(Declarator):
+ """ used in ParameterDeclaration; may lack an identifier """
+
+ def parse_identifier(self,lexer,symbols,stack):
+ if self.verbose:
+ print "%s.parse_identifier()"%self
+ ident = None
+ ident = Identifier()
+ while 1:
+ if lexer.tok == ';':
+ self.parse_error(lexer)
+ if lexer.tok == ')':
+ break
+ if lexer.tok == ',':
+ break
+ if lexer.tok == '[':
+ break
+ if lexer.kind is None:
+ #print "%s.new identifier"%self
+ ident = Identifier( lexer.tok )
+ lexer.get_token()
+ #stack.append( (ident.name, ident) )
+ break
+ stack.append( (lexer.tok, lexer.kind) )
+ lexer.get_token()
+ if self.verbose:
+ print "%s.parse_identifier()=%s"%(self,repr(ident))
+ return ident
+
+class FieldLength(Node):
+ """
+ """
+ pass
+
+class StructDeclarator(Declarator):
+ """
+ """
+ def parse(self,lexer,symbols):
+ if lexer.tok != ':':
+ Declarator.parse(self,lexer,symbols)
+ if lexer.tok == ':':
+ lexer.get_token()
+ # ConstantExpr
+ length = int(lexer.tok)
+ #print "length = ",length
+ self.append( FieldLength(length) )
+ lexer.get_token()
+
+class DeclarationSpecifiers(Node,Parser):
+ """
+ """
+ def __init__(self,*items):
+ Node.__init__(self,*items)
+
+ def __eq__(self,other):
+ " unordered (set/bag) equality "
+ if not isinstance(other,Node):
+ return 0
+ for i in range(len(self)):
+ if not self[i] in other:
+ return 0
+ for i in range(len(other)):
+ if not other[i] in self:
+ return 0
+ return 1
+
+ def parse(self,lexer,symbols):
+ self.parse_spec(lexer,symbols)
+ self.reverse()
+
+ def parse_spec(self,lexer,symbols):
+ typespec = None
+ while lexer.tok:
+ if isinstance( lexer.kind, TypeAlias ) or\
+ isinstance( lexer.kind, BasicType ):
+ if typespec is not None:
+ self.parse_error(lexer ,"type already specified as %s"\
+ %typespec )
+ typespec=lexer.kind
+ self.append( lexer.kind )
+ lexer.get_token()
+ elif isinstance( lexer.kind, Qualifier ):
+ self.append( lexer.kind )
+ lexer.get_token()
+ elif isinstance( lexer.kind, StorageClass ):
+ self.append( lexer.kind )
+ lexer.get_token()
+ elif lexer.tok=='struct':
+ lexer.get_token()
+ self.parse_struct(lexer,symbols)
+ break #?
+ elif lexer.tok=='union':
+ lexer.get_token()
+ self.parse_union(lexer,symbols)
+ break #?
+ elif lexer.tok=='enum':
+ lexer.get_token()
+ self.parse_enum(lexer,symbols)
+ break #?
+ elif lexer.kind is None:
+ # identifier
+ break
+ else:
+ break
+
+ def parse_struct(self,lexer,symbols):
+ if self.verbose:
+ print "%s.parse_struct()"%(self)
+ node = Struct()
+ node.parse(lexer,symbols)
+ _node = None
+ if node.tag:
+ _node = symbols.get_tag( node.tag )
+ if _node is not None:
+ if not isinstance( _node, Struct ):
+ self.parse_error(lexer,"tag defined as wrong kind")
+ if len(node)>1:
+ if len(_node)>1:
+ self.parse_error(lexer,"tag already defined as %s"%_node)
+ #symbols.set_tag( node.tag, node )
+ #else:
+ # refer to the previously defined struct
+ ##node = _node
+ #node = _node.clone()
+ if 0:
+ # refer to the previously defined struct
+ if len(node)==1:
+ _node = symbols.deep_get_tag( node.tag )
+ if _node is not None:
+ node=_node
+ # But what about any future reference to the struct ?
+ if node.tag:
+ symbols.set_tag( node.tag, node )
+ self.append( node )
+
+ def parse_union(self,lexer,symbols):
+ if self.verbose:
+ print "%s.parse_union(%s)"%(self,node)
+ node = Union()
+ node.parse(lexer,symbols)
+ _node = None
+ if node.tag:
+ _node = symbols.get_tag( node.tag )
+ if _node is not None:
+ if not isinstance( _node, Union ):
+ self.parse_error(lexer,"tag %s defined as wrong kind"%repr(node.tag))
+ if len(node)>1:
+ if len(_node)>1:
+ self.parse_error(lexer,"tag already defined as %s"%_node)
+ #symbols.set_tag( node.tag, node )
+ #else:
+ #node = _node
+ #if len(node)==1:
+ #_node = symbols.deep_get_tag( node.tag )
+ #if _node is not None:
+ #node=_node
+ if node.tag:
+ symbols.set_tag( node.tag, node )
+ self.append( node )
+
+ def parse_enum(self,lexer,symbols):
+ if self.verbose:
+ print "%s.parse_enum(%s)"%(self,node)
+ node = Enum()
+ node.parse(lexer,symbols)
+ _node = None
+ if node.tag:
+ _node = symbols.get_tag( node.tag )
+ if _node is not None:
+ if not isinstance( _node, Enum ):
+ self.parse_error(lexer,"tag defined as wrong kind")
+ if len(node)>1:
+ if len(_node)>1:
+ self.parse_error(lexer,"tag already defined as %s"%_node)
+ #symbols.set_tag( node.tag, node )
+ #else:
+ #node = _node
+ #if len(node)==1:
+ #_node = symbols.deep_get_tag( node.tag )
+ #if _node is not None:
+ #node=_node
+ if node.tag:
+ symbols.set_tag( node.tag, node )
+ self.append( node )
+
+ def is_typedef(self):
+ return self.find(Typedef) is not None
+
+ def needs_declarator(self):
+ for node in self:
+ if isinstance( node, Struct ):
+ return False
+ if isinstance( node, Enum ):
+ return False
+ if isinstance( node, Union ):
+ return False
+ return True
+
+
+
+class TypeSpecifiers(DeclarationSpecifiers):
+ " used in ParameterDeclaration "
+
+ def parse_spec(self,lexer,symbols):
+ typespec = None
+ while lexer.tok:
+ if isinstance( lexer.kind, TypeAlias ) or\
+ isinstance( lexer.kind, BasicType ):
+ if typespec is not None:
+ self.parse_error(lexer ,"type already specified as %s"\
+ %typespec )
+ typespec=lexer.kind
+ self.append( lexer.kind )
+ lexer.get_token()
+ elif isinstance( lexer.kind, Qualifier ):
+ self.append( lexer.kind )
+ lexer.get_token()
+ elif isinstance( lexer.kind, StorageClass ):
+ self.parse_error(lexer ,"'%s' cannot appear here"%lexer.tok )
+ elif lexer.tok=='struct':
+ lexer.get_token()
+ self.parse_struct(lexer,symbols)
+ break #?
+ elif lexer.tok=='union':
+ lexer.get_token()
+ self.parse_union(lexer,symbols)
+ break #?
+ elif lexer.tok=='enum':
+ lexer.get_token()
+ self.parse_enum(lexer,symbols)
+ break #?
+ elif lexer.kind is None:
+ # identifier
+ break
+ else:
+ break
+
+
+class Initializer(Node,Parser):
+ """
+ """
+ def __init__(self,*items):
+ Node.__init__(self,*items)
+
+ def parse(self,lexer,symbols):
+ self.parse_error(lexer,"not implemented")
+
+
+class TypeAlias(Node):
+ " typedefed things "
+
+ def __init__(self,name,decl=None):
+ Node.__init__(self,name)#,decl)
+ self.name=name
+ self.decl=decl
+
+
+class Declaration(Node,Parser):
+ """
+ """
+ def __init__(self,*items):
+ Node.__init__(self,*items)
+ #self.acted=False
+
+ def parse(self,lexer,symbols):
+ if not lexer.tok:
+ return
+ Parser.parse_enter(self,lexer)
+ declspec = DeclarationSpecifiers()
+ declspec.parse(lexer,symbols)
+ if len(declspec)==0:
+ if lexer.tok == ';':
+ lexer.get_token()
+ # empty declaration...
+ return
+ self.parse_error(lexer,
+ "expected specifiers, got '%s'"%lexer.tok )
+ self.append(declspec)
+ while 1:
+ decl = Declarator()
+ decl.parse(lexer,symbols)
+ if len(decl)==0:
+ if declspec.needs_declarator():
+ self.parse_error(lexer,
+ "expected declarator, got '%s'"%lexer.tok )
+ self.append(decl)
+ ident = decl.ident
+ if ident is not None:
+ #if len(ident):
+ # install symbol
+ node = symbols[ident[0]]
+ if node is not None:
+ # we allow functions to be defined (as same) again
+ #print node.deepstr(),'\n', self.deepstr()
+ _node = node.clone()
+ _node.delete(Identifier)
+ _self = self.clone()
+ _self.delete(Identifier)
+ if _node != _self:
+ self.parse_error(lexer,
+ "\n%s\n already defined as \n%s\n"%\
+ (self.deepstr(),node.deepstr()))
+ else:
+ if self.is_typedef():
+ #lexer.mktypedef( ident[0], self )
+ tp = TypeAlias(ident[0],decl)
+ lexer.mktypedef( ident[0], tp )
+ else:
+ symbols[ident[0]] = self
+ if lexer.tok == '=':
+ # parse initializer
+ lexer.get_token()
+ init = Initializer()
+ init.parse(lexer,symbols)
+ ident.append( init ) # as in Enum
+ #else: struct, union or enum
+ if lexer.tok == ';':
+ # no more declarators
+ break
+ if lexer.tok == '{':
+ # ! ahhh, function body !!!
+# sys.stderr.write(
+# "WARNING: function body found at line %s\n"%lexer.lno )
+ bcount = 1
+ while bcount:
+ lexer.get_brace_token()
+ if lexer.tok == '}':
+ bcount -= 1
+ if lexer.tok == '{':
+ bcount += 1
+ lexer.get_token()
+ Parser.parse_leave(self,lexer)
+ return
+ self.consume(lexer,',')
+ self.consume(lexer,';')
+ Parser.parse_leave(self,lexer)
+
+ def is_typedef(self):
+ spec=self[0]
+ assert isinstance(spec,DeclarationSpecifiers), self.deepstr()
+ return spec.is_typedef()
+
+
+class ParameterDeclaration(Declaration):
+ """
+ """
+ def parse(self,lexer,symbols):
+ typespec = TypeSpecifiers()
+ typespec.parse(lexer,symbols)
+ self.append(typespec)
+ decl = AbstractDeclarator()
+ decl.parse(lexer,symbols)
+ self.append(decl)
+ ident = decl.ident
+ if ident is not None and ident[0]:
+ node = symbols[ident[0]]
+ if node is not None:
+ self.parse_error(lexer,
+ "%s already defined as %s"%(ident,node))
+ else:
+ symbols[ident[0]] = self
+
+
+class StructDeclaration(Declaration):
+ """
+ """
+ def parse(self,lexer,symbols):
+ if not lexer.tok:
+ return
+ declspec = DeclarationSpecifiers()
+ declspec.parse(lexer,symbols)
+ self.append(declspec)
+ if len(declspec)==0:
+ if lexer.tok == ';':
+ lexer.get_token()
+ # empty declaration...
+ return
+ self.parse_error(lexer,
+ "expected specifiers, got '%s'"%lexer.tok )
+ while 1:
+ decl = StructDeclarator()
+ decl.parse(lexer,symbols)
+ if len(decl)==0:
+ self.parse_error(lexer,
+ "expected declarator, got '%s'"%lexer.tok )
+ self.append(decl)
+ ident = decl.ident
+ if ident is not None:
+ node = symbols[ident[0]]
+ if node is not None:
+ self.parse_error(lexer ,
+ "%s already defined as %s"%(ident,node))
+ else:
+ if declspec.is_typedef():
+ self.parse_error(lexer,"typedef in struct or union")
+ else:
+ symbols[ident[0]] = self
+ if lexer.tok == ';':
+ break
+ self.consume(lexer,',')
+ self.consume(lexer,';')
+
+
+class TransUnit(Node,Parser):
+ """
+ """
+ def __init__(self,*items,**kw):
+ Node.__init__(self,*items,**kw)
+
+ def parse(self,s,verbose=0):
+ self.symbols = Symbols()
+ self.lexer = Lexer(s,verbose=verbose) #,host=__module__)
+ node = None
+ while self.lexer.tok:
+ node=Declaration()
+ node.parse(self.lexer,self.symbols)
+ #sys.stderr.write( "# line %s\n"%self.lexer.lno )
+ if node:
+ self.append(node)
+ #node.psource()
+ #print node.deepstr(),'\n'
+ #node.act()
+
+ def strip(self,files):
+ " leave only the declarations from <files> "
+ i=0
+ while i<len(self):
+ if self[i].file in files:
+ i=i+1
+ else:
+ self.pop(i)
+
+ def strip_filter(self,cb):
+ " leave only the declarations such that cb(file) "
+ i=0
+ while i<len(self):
+ if cb(self[i].file):
+ i=i+1
+ else:
+ self.pop(i)
+
+ def assert_no_dups(self):
+ check={}
+ for node in self.nodes():
+ assert not check.has_key(id(node))
+ check[id(node)]=1
+
+
+
+try:
+ import NoModule
+ import psyco
+ from psyco.classes import *
+except ImportError:
+ class _psyco:
+ def jit(self): pass
+ def bind(self, f): pass
+ def proxy(self, f): return f
+ psyco = _psyco()
+psyco.bind( Lexer.get_token )
+psyco.bind( Node )
+
+def run0():
+ verbose = 0
+ if not sys.argv[1:]:
+ s = sys.stdin.read()
+ if sys.argv[1:]:
+ s = sys.argv[1]
+ #if sys.argv[2:]:
+ #verbose = int(sys.argv[2])
+ if 0:
+ import profile
+ profile.run('TransUnit(s)','prof.out')
+ import pstats
+ p=pstats.Stats('prof.out')
+ p.strip_dirs().sort_stats(-1).print_stats()
+ else:
+ node = TransUnit(verbose = 1 )
+ node.parse(s)
+ node.act(1,1,1)
+
+def run1():
+ cstr = "char *(*)() ,"
+ node = AbstractDeclarator()
+ node.parse( Lexer(cstr,True), Symbols() )
+ print node.deepstr()
+
+if __name__=="__main__":
+ pass
+
+
diff --git a/tools/python-yasm/pyxelator/genpyx.py b/tools/python-yasm/pyxelator/genpyx.py
new file mode 100755
index 0000000..3f2a4cc
--- /dev/null
+++ b/tools/python-yasm/pyxelator/genpyx.py
@@ -0,0 +1,530 @@
+#!/usr/bin/env python
+""" genpyx.py - parse c declarations
+
+(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
+Released under GNU LGPL license.
+
+version 0.xx
+
+This is a module of mixin classes for ir.py .
+
+Towards the end of ir.py our global class definitions
+are remapped to point to the class definitions in ir.py .
+So, for example, when we refer to Node we get ir.Node .
+
+"""
+
+import sys
+from datetime import datetime
+
+# XX use this Context class instead of all those kw dicts !! XX
+class Context(object):
+ " just a record (struct) "
+ def __init__( self, **kw ):
+ for key, value in kw.items():
+ setattr( self, key, value )
+ def __getattr__( self, name ):
+ return None # ?
+ def __getitem__( self, name ):
+ return getattr(self, name)
+
+class OStream(object):
+ def __init__( self, filename=None ):
+ self.filename = filename
+ self.tokens = []
+ self._indent = 0
+ def put( self, token="" ):
+ assert type(token) is str
+ self.tokens.append( token )
+ def startln( self, token="" ):
+ assert type(token) is str
+ self.tokens.append( ' '*self._indent + token )
+ def putln( self, ln="" ):
+ assert type(ln) is str
+ self.tokens.append( ' '*self._indent + ln + '\n')
+ def endln( self, token="" ):
+ assert type(token) is str
+ self.tokens.append( token + '\n')
+ def indent( self ):
+ self._indent += 1
+ def dedent( self ):
+ self._indent -= 1
+ assert self._indent >= 0, self._indent
+ def join( self ):
+ return ''.join( self.tokens )
+ def close( self ):
+ s = ''.join( self.tokens )
+ f = open( self.filename, 'w' )
+ f.write(s)
+
+#
+###############################################################################
+#
+
+class Node(object):
+ """
+ tree structure
+ """
+ _unique_id = 0
+ def get_unique_id(cls):
+ Node._unique_id += 1
+ return Node._unique_id
+ get_unique_id = classmethod(get_unique_id)
+
+# XX toks: use a tree of tokens: a list that can be push'ed and pop'ed XX
+ def pyxstr(self,toks=None,indent=0,**kw):
+ """
+ Build a list of tokens; return the joined tokens string
+ """
+ if toks is None:
+ toks = []
+ for x in self:
+ if isinstance(x,Node):
+ x.pyxstr(toks, indent, **kw)
+ else:
+ toks.insert(0,str(x)+' ')
+ s = ''.join(toks)
+ return s
+
+#
+#################################################
+
+class Named(object):
+ "has a .name property"
+ pass
+
+class BasicType(object):
+ "float double void char int"
+ pass
+
+class Qualifier(object):
+ "register signed unsigned short long const volatile inline"
+ def pyxstr(self,toks=None,indent=0,**kw):
+ if toks is None:
+ toks = []
+ x = self[0]
+ if x not in ( 'const','volatile','inline','register'): # ignore these
+ toks.insert(0,str(x)+' ')
+ s = ''.join(toks)
+ return s
+
+class StorageClass(object):
+ "extern static auto"
+ def pyxstr(self,toks=None,indent=0,**kw):
+ return ""
+
+class Ellipses(object):
+ "..."
+ pass
+
+class GCCBuiltin(BasicType):
+ "things with __builtin prefix"
+ pass
+
+class Identifier(object):
+ """
+ """
+ def pyxstr(self,toks=None,indent=0,**kw):
+ if toks is None:
+ toks=[]
+ if self.name:
+ toks.append( self.name )
+ return " ".join(toks)
+
+class TypeAlias(object):
+ """
+ typedefed things, eg. size_t
+ """
+ def pyxstr(self,toks=None,indent=0,cprefix="",**kw):
+ if toks is None:
+ toks = []
+ for x in self:
+ if isinstance(x,Node):
+ x.pyxstr(toks, indent, cprefix=cprefix, **kw)
+ else:
+ s = str(x)+' '
+ if cprefix:
+ s = cprefix+s
+ toks.insert(0,s)
+ s = ''.join(toks)
+ return s
+
+class Function(object):
+ """
+ """
+ def pyxstr(self,toks,indent=0,**kw):
+ #print '%s.pyxstr(%s)'%(self,toks)
+ _toks=[]
+ assert len(self)
+ i=0
+ while isinstance(self[i],Declarator):
+ if not self[i].is_void():
+ _toks.append( self[i].pyxstr(indent=indent, **kw) )
+ i=i+1
+ toks.append( '(%s)'% ', '.join(_toks) )
+ while i<len(self):
+ self[i].pyxstr(toks, indent=indent, **kw)
+ i=i+1
+ return " ".join(toks)
+
+class Pointer(object):
+ """
+ """
+ def pyxstr(self,toks,indent=0,**kw):
+ assert len(self)
+ node=self[0]
+ toks.insert(0,'*')
+ if isinstance(node,Function):
+ toks.insert(0,'(')
+ toks.append(')')
+ elif isinstance(node,Array):
+ toks.insert(0,'(')
+ toks.append(')')
+ return Node.pyxstr(self,toks,indent, **kw)
+
+class Array(object):
+ """
+ """
+ def pyxstr(self,toks,indent=0,**kw):
+ if self.size is None:
+ toks.append('[]')
+ else:
+ try:
+ int(self.size)
+ toks.append('[%s]'%self.size)
+ except:
+ toks.append('[]')
+ return Node( *self[:-1] ).pyxstr( toks,indent, **kw )
+
+class Tag(object):
+ " the tag of a Struct, Union or Enum "
+ pass
+
+class Taged(object):
+ "Struct, Union or Enum "
+ pass
+
+class Compound(Taged):
+ "Struct or Union"
+ def pyxstr(self,_toks=None,indent=0,cprefix="",shadow_name=True,**kw):
+ if _toks is None:
+ _toks=[]
+ names = kw.get('names',{})
+ kw['names'] = names
+ tag_lookup = kw.get('tag_lookup')
+ if self.tag:
+ tag=self.tag.name
+ else:
+ tag = ''
+ if isinstance(self,Struct):
+ descr = 'struct'
+ elif isinstance(self,Union):
+ descr = 'union'
+ _node = names.get(self.tag.name,None)
+ if ( _node is not None and _node.has_members() ) or \
+ ( _node is not None and not self.has_members() ):
+ descr = '' # i am not defining myself here
+ #print "Compound.pyxstr", tag
+ #print self.deepstr()
+ if descr:
+ if cprefix and shadow_name:
+ tag = '%s%s "%s"'%(cprefix,tag,tag)
+ elif cprefix:
+ tag = cprefix+tag
+ toks = [ descr+' '+tag ] # struct foo
+ if self.has_members():
+ toks.append(':\n')
+ for decl in self[1:]: # XX self.members
+ toks.append( decl.pyxstr(indent=indent+1, cprefix=cprefix, shadow_name=shadow_name, **kw)+"\n" ) # shadow_name = False ?
+ #elif not tag_lookup.get( self.tag.name, self ).has_members():
+ # define empty struct here, it's the best we're gonna get
+ #pass
+ else:
+ if cprefix: # and shadow_name:
+ tag = cprefix+tag
+ toks = [ ' '+tag+' ' ] # foo
+ while toks:
+ _toks.insert( 0, toks.pop() )
+ return "".join( _toks )
+
+class Struct(Compound):
+ """
+ """
+ pass
+
+class Union(Compound):
+ """
+ """
+ pass
+
+
+class Enum(Taged):
+ """
+ """
+ def pyxstr(self,_toks=None,indent=0,cprefix="",shadow_name=True,**kw):
+ if _toks is None:
+ _toks=[]
+ names = kw.get('names',{})
+ kw['names'] = names
+ if self.tag:
+ tag=self.tag.name
+ else:
+ tag = ''
+ _node = names.get(self.tag.name,None)
+ if ( _node is not None and _node.has_members() ) or \
+ ( _node is not None and not self.has_members() ):
+ descr = '' # i am not defining myself here
+ else:
+ descr = 'enum'
+ if descr:
+ #if not names.has_key(self.tag.name):
+ toks = [ descr+' '+tag ] # enum foo
+ toks.append(':\n')
+ idents = [ ident for ident in self.members if ident.name not in names ]
+ for ident in idents:
+ if cprefix and shadow_name:
+ ident = ident.clone()
+ ident.name = '%s%s "%s"' % ( cprefix, ident.name, ident.name )
+ #else: assert 0
+ toks.append( ' '+' '*indent + ident.pyxstr(**kw)+"\n" )
+ names[ ident.name ] = ident
+ if not idents:
+ # empty enum def'n !
+ #assert 0 # should be handled by parents...
+ toks.append( ' '+' '*indent + "pass\n" )
+ else:
+ toks = [ ' '+tag+' ' ] # foo
+ while toks:
+ _toks.insert( 0, toks.pop() )
+ return "".join( _toks )
+
+class Declarator(object):
+ def is_pyxnative( self ):
+ # pyrex handles char* too
+ # but i don't know if we should make this the default
+ # sometimes we want to send a NULL, so ... XX
+ self = self.cbasetype() # WARNING: cbasetype may be cached
+ if self.is_void():
+ return False
+ if self.is_primative():
+ return True
+ if self.enum:
+ return True
+ #pointer = None
+ #if self.pointer:
+ #pointer = self.pointer
+ #elif self.array:
+ #pointer = self.array
+ #if pointer and pointer.spec:
+ #spec = pointer.spec
+ #if BasicType("char") in spec and not Qualifier("unsigned") in spec:
+ # char*, const char*
+ ##print self.deepstr()
+ #return True
+ return False
+
+ def _pyxstr( self, toks, indent, cprefix, use_cdef, shadow_name, **kw ):
+ " this is the common part of pyxstr that gets called from both Declarator and Typedef "
+ names = kw.get('names',{}) # what names have been defined ?
+ kw['names']=names
+ for node in self.nodes(): # depth-first
+ if isinstance(node,Taged):
+ #print "Declarator.pyxstr", node.cstr()
+ if not node.tag.name:
+ node.tag.name = "_anon_%s" % Node.get_unique_id()
+ _node = names.get(node.tag.name,None)
+ #tag_lookup = kw.get('tag_lookup')
+ #other = tag_lookup.get(node.tag.name, node)
+ #if ((_node is None and (not isinstance(other,Compound) or not other.has_members()))
+ # or node.has_members()):
+ if _node is None or node.has_members():
+ # either i am not defined at all, or this is my _real_ definition
+ # emit def'n of this node
+ #if isinstance(self,Typedef):
+ #toks.append( ' '*indent + 'ctypedef ' + node.pyxstr(indent=indent, cprefix=cprefix, shadow_name=shadow_name, **kw).strip() )
+ #else:
+ toks.append( ' '*indent + 'cdef ' + node.pyxstr(indent=indent, cprefix=cprefix, shadow_name=shadow_name, **kw).strip() )
+ names[ node.tag.name ] = node
+ elif isinstance(node,GCCBuiltin) and node[0] not in names:
+ #toks.append( ' '*indent + 'ctypedef long ' + node.pyxstr(indent=indent, **kw).strip() + ' # XX ??' ) # XX ??
+ toks.append( ' '*indent + 'struct __unknown_builtin ' )
+ toks.append( ' '*indent + 'ctypedef __unknown_builtin ' + node.pyxstr(indent=indent, **kw).strip() )
+ names[ node[0] ] = node
+ for idx, child in enumerate(node):
+ if type(child)==Array and not child.has_size():
+ # mutate this mystery array into a pointer XX method: Array.to_pointer()
+ node[idx] = Pointer()
+ node[idx].init_from( child ) # warning: shallow init
+ node[idx].pop() # pop the size element
+
+ def pyxstr(self,toks=None,indent=0,cprefix="",use_cdef=True,shadow_name=True,**kw):
+ " note: i do not check if my name is already in 'names' "
+ self = self.clone() # <----- NOTE
+ toks=[]
+ names = kw.get('names',{}) # what names have been defined ?
+ kw['names']=names
+
+ self._pyxstr( toks, indent, cprefix, use_cdef, shadow_name, **kw )
+
+ if self.name and not names.has_key( self.name ):
+ names[ self.name ] = self
+ if self.identifier is not None:
+ comment = ""
+ if self.name in python_kws:
+ comment = "#"
+ if cprefix and use_cdef and shadow_name:
+ # When we are defining this guy, we refer to it using the pyrex shadow syntax.
+ self.name = '%s%s "%s" ' % ( cprefix, self.name, self.name )
+ cdef = 'cdef '
+ if not use_cdef: cdef = '' # sometimes we don't want the cdef (eg. in a cast)
+ # this may need shadow_name=False:
+ toks.append( ' '*indent + comment + cdef + Node.pyxstr(self,indent=indent, cprefix=cprefix, **kw).strip() ) # + "(cprefix=%s)"%cprefix)
+ #else: i am just a struct def (so i already did that) # huh ?? XX bad comment
+ return ' \n'.join(toks)
+
+ def pyxsym(self, ostream, names=None, tag_lookup=None, cprefix="", modname=None, cobjects=None):
+ assert self.name is not None, self.deepstr()
+ ostream.putln( '# ' + self.cstr() )
+# This cdef is no good: it does not expose a python object
+# and we can't reliably set a global var
+ #ostream.putln( 'cdef %s %s' % ( self.pyx_adaptor_decl(cobjects), self.name ) ) # _CObject
+ #ostream.putln( '%s = %s()' % (self.name, self.pyx_adaptor_name(cobjects)) )
+ #ostream.putln( '%s.p = <void*>&%s' % (self.name, cprefix+self.name) )
+ ## expose a python object:
+ #ostream.putln( '%s.%s = %s' % (modname,self.name, self.name) )
+ ostream.putln( '%s = %s( addr = <long>&%s )' % (self.name, self.pyx_adaptor_name(cobjects), cprefix+self.name) )
+ return ostream
+
+
+class Typedef(Declarator):
+ def pyxstr(self,toks=None,indent=0,cprefix="",use_cdef=True,shadow_name=True,**kw): # shadow_name=True
+ " warning: i do not check if my name is already in 'names' "
+ assert shadow_name == True
+ self = self.clone() # <----- NOTE
+ toks=[]
+ names = kw.get('names',{}) # what names have been defined ?
+ kw['names']=names
+
+ #if self.tagged and not self.tagged.tag.name:
+ ## "typedef struct {...} foo;" => "typedef struct foo {...} foo;"
+ ## (to be emitted in the node loop below, and suppressed in the final toks.append)
+ #self.tagged.tag = Tag( self.name ) # this is how pyrex does it: tag.name == self.name
+ # XX that doesn't work (the resulting c fails to compile) XX
+
+ self._pyxstr( toks, indent, cprefix, use_cdef, shadow_name, **kw )
+
+ #print self.deepstr()
+ if self.name and not names.has_key( self.name ):
+ names[ self.name ] = self
+ if not (self.tagged and self.name == self.tagged.tag.name):
+ comment = ""
+ if self.name in python_kws:
+ comment = "#"
+ #if cprefix:
+ # self.name = '%s%s "%s" ' % ( cprefix, self.name, self.name ) # XX pyrex can't do this
+ if cprefix: # shadow_name=True
+ # My c-name gets this prefix. See also TypeAlias.pyxstr(): it also prepends the cprefix.
+ self.name = '%s%s "%s" ' % ( cprefix, self.name, self.name )
+ toks.append( ' '*indent + comment + 'ctypedef ' + Node.pyxstr(self,indent=indent, cprefix=cprefix, **kw).strip() )
+ return ' \n'.join(toks)
+
+
+class AbstractDeclarator(Declarator):
+ """ used in Function; may lack an identifier """
+ def pyxstr(self,toks=None,indent=0,**kw):
+ if self.name in python_kws:
+ # Would be better to do this in __init__, but our subclass doesn't call our __init__.
+ self.name = '_' + self.name
+ #return ' '*indent + Node.pyxstr(self,toks,indent, **kw).strip()
+ return Node.pyxstr(self,toks,indent, **kw).strip()
+
+
+class FieldLength(object):
+ """
+ """
+ def pyxstr(self,toks,indent,**kw):
+ pass
+
+
+class StructDeclarator(Declarator): # also used in Union
+ """
+ """
+ def pyxstr(self,toks=None,indent=0,**kw):
+ comment = ""
+ if self.name in python_kws:
+ comment = "#"
+ return ' '*indent + comment + Node.pyxstr(self,toks,indent, **kw).strip()
+
+class DeclarationSpecifiers(object):
+ """
+ """
+ pass
+
+class TypeSpecifiers(DeclarationSpecifiers):
+ """
+ """
+ pass
+
+class Initializer(object):
+ """
+ """
+ pass
+
+class Declaration(object):
+ """
+ """
+ pass
+
+class ParameterDeclaration(Declaration):
+ """
+ """
+ pass
+
+class StructDeclaration(Declaration):
+ """
+ """
+ pass
+
+class TransUnit(object):
+ """
+ Top level node.
+ """
+ def pyx_decls(self, filenames, modname, macros = {}, names = {}, func_cb=None, cprefix="", **kw):
+ # PART 1: emit extern declarations
+ ostream = OStream()
+ now = datetime.today()
+ ostream.putln( now.strftime('# Code generated by pyxelator on %x at %X') + '\n' )
+ ostream.putln("# PART 1: extern declarations")
+ for filename in filenames:
+ ostream.putln( 'cdef extern from "%s":\n pass\n' % filename )
+ ostream.putln( 'cdef extern from *:' )
+ file = None # current file
+ for node in self:
+ ostream.putln('')
+ ostream.putln(' # ' + node.cstr() )
+ assert node.marked
+ comment = False
+ if node.name and node.name in names:
+ comment = True # redeclaration
+ #ostream.putln( node.deepstr( comment=True ) )
+ s = node.pyxstr(indent=1, names=names, tag_lookup = self.tag_lookup, cprefix=cprefix, **kw)
+ if s.split():
+ if comment:
+ s = "#"+s.replace( '\n', '\n#' ) + " # redeclaration "
+ if node.file != file:
+ file = node.file
+ #ostream.putln( 'cdef extern from "%s":' % file )
+ ostream.putln( ' # "%s"' % file )
+ ostream.putln( s )
+ ostream.putln('\n')
+ #s = '\n'.join(toks)
+ return ostream.join()
+
+# XX warn when we find a python keyword XX
+python_kws = """
+break continue del def except exec finally pass print raise
+return try global assert lambda yield
+for while if elif else and in is not or import from """.split()
+python_kws = dict( zip( python_kws, (None,)*len(python_kws) ) )
+
+
diff --git a/tools/python-yasm/pyxelator/ir.py b/tools/python-yasm/pyxelator/ir.py
new file mode 100755
index 0000000..cfa9c02
--- /dev/null
+++ b/tools/python-yasm/pyxelator/ir.py
@@ -0,0 +1,1163 @@
+#!/usr/bin/env python
+""" ir.py - parse c declarations
+
+(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
+Released under GNU LGPL license.
+
+version 0.xx
+
+"""
+
+import sys
+#import cPickle as pickle
+import pickle
+
+#from lexer import Lexer
+from parse_core import Symbols #, Parser
+import node as node_module
+import cparse
+import genpyx
+
+class Node(genpyx.Node, node_module.Node):
+ """
+ tree structure
+ """
+ def __init__( self, *args, **kw ):
+ node_module.Node.__init__( self, *args, **kw )
+ self._marked = False
+ def get_marked( self ):
+ return self._marked
+ def set_marked( self, marked ):
+# if marked:
+# print "MARK", self
+ self._marked = marked
+ marked = property( get_marked, set_marked )
+
+# def __getstate__( self ):
+# return self.__class__, tuple( [ item.__getstate__() for item in self ] )
+# def __setstate__( self, state ):
+# cls, states = state
+# states = list(states)
+# for idx, state in enumerate(states):
+# items[idx] = items[idx].__setstate__(
+ def __getstate__(self):
+ return str(self)
+ def __setstate__(self, state):
+ Node.__init__(self)
+ self[:] = eval(state)
+
+# _unique_id = 0
+# def get_unique_id(cls):
+# Node._unique_id += 1
+# return Node._unique_id
+# get_unique_id = classmethod(get_unique_id)
+
+ def __hash__( self ):
+ return hash( tuple([hash(type(self))]+[hash(item) for item in self]) )
+
+ def clone(self):
+ l = []
+ for item in self:
+ if isinstance(item,Node):
+ item = item.clone()
+ l.append(item)
+ return self.__class__(*l, **self.__dict__)
+
+ def init_from( self, other ): # class method ?
+ # Warning: shallow init
+ self[:] = other
+ self.__dict__.update( other.__dict__ )
+ return self
+
+# def is_struct(self):
+# for x in self:
+# if isinstance(x,Node):
+# if x.is_struct():
+# return 1
+# return 0
+
+
+ #def explain(self):
+ #l = []
+ #for x in self:
+ #if isinstance(x,Node):
+ #l.append(x.explain())
+ #else:
+ #l.append(str(x))
+ #return string.join(l," ")
+ ##(self.__class__.__name__,string.join(l) )
+
+ def psource(self):
+ if hasattr(self,'lines'):
+# print "# "+string.join(self.lines,"\n# ")+"\n"
+ print "# "+"\n# ".join(self.lines)+"\n"
+
+ def cstr(self,l=None):
+ """
+ Build a list of tokens; return the joined tokens string
+ """
+ if l is None:
+ l = []
+ for x in self:
+ if isinstance(x,Node):
+ x.cstr(l)
+ else:
+ l.insert(0,str(x)+' ')
+ s = ''.join(l)
+ return s
+
+ def ctype(self): # anon_clone
+ " return clone of self without identifiers "
+ #print "%s.ctype()"%self
+ l=[]
+ for x in self:
+ if isinstance(x,Node):
+ l.append(x.ctype())
+ else:
+ l.append(x)
+ #print "%s.__class__(*%s)"%(self,l)
+ return self.__class__(*l, **self.__dict__) # XX **self.__dict__ ?
+
+ def cbasetype(self):
+ " return ctype with all TypeAlias's replaced "
+ # WARNING: we cache results (so do not mutate self!!)
+ l=[]
+ for x in self:
+ if isinstance(x,Node):
+ l.append(x.cbasetype())
+ else:
+ l.append(x)
+ #print "%s.__class__(*%s)"%(self,l)
+ return self.__class__(*l, **self.__dict__) # XX **self.__dict__ ?
+
+ def signature( self, tank=None ):
+ if tank is None:
+ tank = {}
+ for node in self.nodes():
+ if not tank.has_key( type(node) ):
+ tank[ type(node) ] = {}
+ type(node).tank = tank[type(node)]
+ shape = tuple( [ type(_node).__name__ for _node in node ] )
+ if not tank[type(node)].has_key(shape):
+ tank[type(node)][shape] = []
+ tank[type(node)][shape].append( node )
+ return tank
+
+ def psig( self, tank=None ):
+ if tank is None:
+ tank = {}
+ tank = self.signature(tank)
+ for key in tank.keys():
+ print key.__name__
+ for shape in tank[key].keys():
+ print " ", shape
+
+#
+#################################################
+
+class Named(genpyx.Named, Node):
+ " has a .name property "
+ def get_name(self):
+ if self:
+ assert type(self[0])==str
+ return self[0]
+ return None
+ def set_name(self, name):
+ if self:
+ self[0] = name
+ else:
+ self.append(name)
+ name = property(get_name,set_name)
+
+
+class BasicType(genpyx.BasicType, Named):
+ "float double void char int"
+ pass
+
+class Qualifier(genpyx.Qualifier, Named):
+ "register signed unsigned short long const volatile inline"
+ pass
+
+class StorageClass(genpyx.StorageClass, Named):
+ "extern static auto"
+ pass
+
+class Ellipses(genpyx.Ellipses, Named):
+ "..."
+ pass
+
+class GCCBuiltin(genpyx.GCCBuiltin, BasicType):
+ "things with __builtin prefix"
+ pass
+
+class Identifier(genpyx.Identifier, Named):
+ """
+ shape = +( str, +ConstExpr )
+ """
+ #def explain(self):
+ #if len(self)==1:
+ #return "%s"%self.name
+ #else:
+ #return "%s initialized to %s"%(self.name,
+ #Node(self[1]).explain()) # will handle Initializer
+
+# def ctype(self):
+# return self.__class__(*self[1:]) #.clone() ?
+
+# def get_name(self):
+# if self:
+# return self[0]
+# def set_name(self, name):
+# if self:
+# self[0] = name
+# else:
+# self.append(name)
+# name = property(get_name,set_name)
+
+ def cstr(self,l=None):
+ if l is None:
+ l=[]
+ if len(self)>1:
+ assert len(self)==2
+ l.append( '%s = %s'%(self[0],self[1]) )
+ elif len(self)==1:
+ l.append( str(self[0]) )
+ return " ".join(l)
+
+class TypeAlias(genpyx.TypeAlias, Named):
+ """
+ typedefed things, eg. size_t
+
+ """
+ def cbasetype( self ):
+ node = self.typedef.cbasetype().get_rest()
+ return node
+
+class Function(genpyx.Function, Node):
+ """
+ """
+ #def explain(self):
+ #if len(self):
+ #return "function (%s), returning"%\
+ #", ".join( map(lambda x:x.explain(),self) )
+ #else:
+ #return "function returning"
+
+ def cstr(self,l):
+ #print '%s.cstr(%s)'%(self,l)
+ _l=[]
+ assert len(self)
+ i=0
+ while isinstance(self[i],Declarator):
+ _l.append( self[i].cstr() )
+ i=i+1
+ l.append( '(%s)'% ', '.join(_l) )
+ while i<len(self):
+ self[i].cstr(l)
+ i=i+1
+ return " ".join(l)
+
+ def return_type(self):
+ node = self[-1]
+ #assert isinstance(node,DeclarationSpecifiers)
+ return Declarator( Identifier(), node )
+ ret = property(return_type)
+
+ def get_args(self):
+ args = [ arg for arg in self[:-1] if not arg.is_void() ]
+ return args
+ args = property(get_args)
+
+ def arg_types(self):
+ return [ AbstractDeclarator().init_from( arg.ctype() ) for arg in self[:-1]]
+
+ def is_varargs(self):
+ for node in self.nodes():
+ if isinstance(node,Ellipses) or 'va_list' in node:
+# print self, 'is_varargs'
+ return True
+# print self, 'is_varargs'
+ return False
+# return fn.deepfind(Ellipses) or fn.deepfind('va_list')
+
+ def ctype(self):
+ return Function(*self.arg_types()+[self[-1]]) # XX self[-1].ctype
+
+
+class Pointer(genpyx.Pointer, Node):
+ """
+ """
+ def get_spec(self):
+ if type(self[0])==TypeSpecifiers: # isinstance ??
+ return self[0]
+ spec = property(get_spec)
+
+ #def explain(self):
+ #return "pointer to"
+
+ def cstr(self,l):
+ assert len(self)
+ node=self[0]
+ l.insert(0,'*')
+ if isinstance(node,Function):
+ l.insert(0,'(')
+ l.append(')')
+ elif isinstance(node,Array):
+ l.insert(0,'(')
+ l.append(')')
+ return Node.cstr(self,l)
+
+class Array(genpyx.Array, Node):
+ """
+ """
+ #def explain(self):
+ #s=''
+ #if len(self):
+ #if type(self[0])==int:
+ #s='0 to %s '%(self[0]-1)
+ #return "array %sof"%s
+ def has_size(self):
+ try:
+ int(self.size)
+ return True
+ except:
+ return False
+
+ def get_size(self):
+ if type(self[-1])==str:
+ try: return int(self[-1])
+ except: return self[-1]
+ return self[-1] # None
+ size = property(get_size)
+
+ def get_spec(self):
+ if type(self[0])==TypeSpecifiers: # isinstance ??
+ return self[0]
+ spec = property(get_spec)
+
+ def to_pointer(self):
+ node = Pointer()
+ node.init_from( self.clone() )
+ node.pop() # pop the size element
+ return node
+
+ def cstr(self,l):
+ if self.size is None:
+ l.append('[]')
+ else:
+ l.append('[%s]'%self.size)
+ return Node( *self[:-1] ).cstr( l )
+
+class Tag(genpyx.Tag, Named):
+ " the tag of a Struct, Union or Enum "
+ pass
+
+class Taged(genpyx.Taged, Node):
+ "Struct, Union or Enum "
+ def get_tag(self):
+ if len(self):
+ tag = self[0]
+ assert type(tag)==Tag # isinstance ??
+ else:
+ tag = None
+ return tag
+ def set_tag(self,tag):
+ if len(self):
+ self[0] = tag
+ else:
+ self.append(tag)
+ tag = property( get_tag, set_tag )
+ def has_members(self):
+ return len(self)>1 # more than just a tag
+ def get_members(self):
+ return self[1:]
+ members = property(get_members) # fields ?
+
+ def ctype(self):
+ if not self.tag.name:
+ #print "# WARNING : anonymous struct " # OK i think
+ return self.clone()
+# self = self.clone()
+# return self[:1] # just the tag
+ return self.__class__( self.tag, **self.__dict__ ) # just the Tag
+# return self.__class__( *self, **self.__dict__ )
+
+ def cbasetype(self):
+ return self.ctype() # is this enough ???
+# return Node.cbasetype(self) # XX lookup my tag if i am empty ..?
+
+
+class Compound(genpyx.Compound, Taged):
+ "Struct or Union"
+
+ def cstr(self,_l=None):
+ assert isinstance( self[0], Tag )
+ tag=''
+ if len(self[0]):
+ tag=' '+self[0][0]
+ if isinstance(self,Struct):
+ l=[ 'struct%s '%tag ]
+ elif isinstance(self,Union):
+ l=[ 'union%s '%tag ]
+ if len(self)>1:
+ l.append(' { ')
+ for decl in self[1:]:
+ l.append( decl.cstr()+"; " )
+ l.append('} ')
+ if _l is None:
+ _l=[]
+ while l:
+ _l.insert( 0, l.pop() )
+ # XX empty struct with no tag -> "struct" XX
+ return "".join( _l )
+
+ def ctype(self):
+ tp = Taged.ctype(self)
+ for i in range(1,len(tp)):
+ tp[i] = StructDeclarator().init_from( tp[i] )
+ return tp
+
+class Struct(genpyx.Struct, Compound):
+ """
+ """
+ pass
+
+
+class Union(genpyx.Union, Compound):
+ """
+ """
+ pass
+
+
+class Enum(genpyx.Enum, Taged):
+ """
+ """
+ def cstr(self,_l=None):
+ assert isinstance( self[0], Tag )
+ tag=''
+ if len(self[0]):
+ tag=' '+self[0][0]
+ l=[ 'enum%s '%tag ]
+ if len(self)>1:
+ l.append(' { ')
+ for node in self[1:]:
+ l.append( node.cstr()+', ' )
+ l.append('} ')
+ if _l is None:
+ _l=[]
+ while l:
+ _l.insert( 0, l.pop() )
+ return ''.join( _l )
+
+class Declarator(genpyx.Declarator, Node):
+ """
+ """
+
+ def __eq__(self,other):
+ " unordered equality "
+ # ordering sometimes gets lost when we do a cbasetype
+ if not isinstance(other,Node):
+ return False
+ a, b = self[:], other[:]
+ a.sort()
+ b.sort()
+ return a == b
+
+ def __hash__( self ):
+ hs = [hash(item) for item in self]
+ hs.sort()
+ return hash( tuple([hash(type(self))]+hs) )
+
+ def transform(self):
+ return
+
+ def get_identifier(self):
+ if len(self)>1:
+ return self[0]
+ def set_identifier(self, identifier):
+ if len(self)>1:
+ self[0] = identifier
+ else:
+ self.insert(0,identifier)
+ identifier = property(get_identifier,set_identifier)
+
+ def get_spec(self):
+ spec = self[-1]
+ if type(spec)==TypeSpecifiers: # isinstance ??
+ return spec
+ spec = property(get_spec)
+
+ def get_type_alias(self):
+ if self.spec:
+ if isinstance(self.spec[0], TypeAlias):
+ return self.spec[0]
+ type_alias = property(get_type_alias)
+
+ def get_tagged(self):
+ if self.spec:
+ return self.spec.tagged # i am a tagged
+ tagged = property(get_tagged)
+
+ def get_compound(self):
+ if self.spec:
+ return self.spec.compound # i am a compound
+ compound = property(get_compound)
+
+ def get_struct(self):
+ if self.spec:
+ return self.spec.struct # i am a struct
+ struct = property(get_struct)
+
+ def get_union(self):
+ if self.spec:
+ return self.spec.union # i am a union
+ union = property(get_union)
+
+ def get_enum(self):
+ if self.spec:
+ return self.spec.enum # i am an enum
+ enum = property(get_enum)
+
+ def get_function(self):
+ if len(self)>1 and type(self[1])==Function: # isinstance ??
+ return self[1]
+ function = property(get_function)
+
+ def get_pointer(self):
+ if len(self)>1 and type(self[1])==Pointer: # isinstance ??
+ return self[1]
+ pointer = property(get_pointer)
+
+ def get_array(self):
+ if len(self)>1 and type(self[1])==Array: # isinstance ??
+ return self[1]
+ array = property(get_array)
+
+ def get_name(self):
+ if self.identifier:
+ return self.identifier.name
+ def set_name(self, name):
+ assert self.identifier is not None
+ self.identifier.name = name
+ name = property(get_name, set_name)
+
+ def get_rest(self): # XX needs a better name
+ if len(self)>1:
+ return self[1]
+ return self[0]
+
+ def pointer_to( self ):
+ " return Declarator pointing to self's type "
+ decl = Declarator(Identifier(), Pointer(self.get_rest().clone()))
+ return decl
+
+ def deref( self ):
+ " return (clone of) Declarator that self is pointing to "
+ node = self.ctype() # clone
+ pointer = node.pointer or node.array
+ assert pointer, "cannot dereference non-pointer"
+ node[1:2] = pointer
+ return node
+
+ def is_void(self):
+ return self.spec and BasicType('void') in self.spec
+
+ def is_pointer_to_fn(self):
+ return self.pointer and self.deref().function
+
+ def is_pointer_to_char(self):
+# return self.ctype() == TransUnit("char *a;").transform()[0].ctype()
+ node = self.pointer or self.array
+ if node:
+ spec = node.spec
+ if spec and BasicType('char') in spec and not BasicType('unsigned') in spec:
+ return True
+ return False
+
+ def is_callback(self):
+ " i am a pointer to a function whose last arg is void* "
+ if self.is_pointer_to_fn():
+ fn = self.deref().function
+ if fn.args:
+ arg = fn.args[-1]
+ if arg.pointer and arg.deref().is_void():
+ return True
+
+ def is_complete( self, tag_lookup ):
+ if self.tagged and self.tagged.tag.name in tag_lookup and not tag_lookup[self.tagged.tag.name].has_members():
+ return False
+ return True
+
+ def is_primative( self ):
+ "i am a char,short,int,float,double... "
+ spec = self.cbasetype().spec
+ return spec and spec.find(BasicType)
+
+ def is_pyxnative( self ):
+ # pyrex handles char* too
+ # but i don't know if we should make this the default
+ # sometimes we want to send a NULL, so ... XXX
+ self = self.cbasetype()
+ if self.is_void():
+ return False
+ if self.is_primative():
+ return True
+ if self.enum:
+ return True
+# pointer = None
+# if self.pointer:
+# pointer = self.pointer
+# elif self.array:
+# pointer = self.array
+# if pointer and pointer.spec:
+# spec = pointer.spec
+# if BasicType("char") in spec and not Qualifier("unsigned") in spec:
+# # char*, const char*
+## print self.deepstr()
+# return True
+ return False
+
+ def cstr(self,l=None):
+ return Node.cstr(self,l).strip()
+
+ def ctype(self):
+ decl=Declarator()
+ decl.init_from( self.clone() )
+ decl.identifier = Identifier()
+ for i in range(1,len(decl)):
+ decl[i]=decl[i].ctype()
+ return decl
+
+ def cbasetype(self):
+ # WARNING: we cache results (so do not mutate self!!)
+ try:
+ # this cache improves performance by 50%
+ return self.__cbasetype.clone()
+ except AttributeError:
+ pass
+ decl = self.ctype() # gets rid of Identifier names
+ for i, node in enumerate(decl):
+ decl[i] = decl[i].cbasetype()
+# return decl.get_rest()
+
+ done = False
+ while not done:
+ done = True
+ nodes = decl.deepfilter( TypeSpecifiers )
+ for node in nodes:
+ if node.deepfind( TypeSpecifiers ) != node:
+ # this node has another TypeSpecifier;
+ decl.expose_node( node )
+ done = False
+ break # start again...
+
+ # each TypeSpecifier needs to absorb primitive siblings (StorageClass, BasicType etc.)
+ nodes = decl.deepfilter( TypeSpecifiers )
+ for node in nodes:
+ parent = decl.get_parent(node)
+ i = 0
+ while i < len(parent):
+ assert not type(parent[i]) in (TypeAlias, Enum, Struct, Union)
+ if type(parent[i]) in (StorageClass, BasicType, Qualifier):
+ node.append( parent.pop(i) )
+ else:
+ i = i + 1
+
+ self.__cbasetype = decl.clone()
+ return decl
+
+ def invalidate(self):
+ # flush cache, etc.
+ try:
+ del self.__cbasetype
+ except AttributeError:
+ pass
+
+ def declare_str(self,name):
+ " return c string declaring name with same type as self "
+ tp = self.ctype()
+ tp.name = name
+ return tp.cstr()+";"
+
+class Typedef(genpyx.Typedef, Declarator):
+ def cstr(self,l=None):
+ return 'typedef ' + Declarator.cstr(self,l) #.strip()
+
+class AbstractDeclarator(genpyx.AbstractDeclarator, Declarator):
+ """ used in Function; may lack an identifier """
+
+ #def cstr(self,l=None):
+ #return Node.cstr(self,l)
+
+# def ctype(self):
+# # _type_ ignores the name of our identifier
+# return Node.ctype(self)
+
+class FieldLength(genpyx.FieldLength, Node):
+ """
+ """
+ #def explain(self):
+ #return ""
+
+ def cstr(self,l):
+ l.append(':%s'%self[0])
+
+class StructDeclarator(genpyx.StructDeclarator, Declarator): # also used in Union
+ """
+ """
+ #def explain(self):
+ #flen = self.find(FieldLength)
+ #if flen is not None:
+ #i = self.index(flen)
+ #self.pop(i)
+ #s = Declarator.explain(self)
+ #self.insert(i,flen)
+ #width = flen[0]
+ #if width > 0:
+ #return s+" bitfield %s wide"%width
+ #else:
+ #return s+" alignment bitfield"
+ #else:
+ #return Declarator.explain(self)
+# def ctype(self):
+# return self
+ def get_field_length(self):
+ if len(self)>1 and isinstance( self[1], FieldLength ):
+ return self[1]
+ field_length = property(get_field_length)
+
+
+class DeclarationSpecifiers(genpyx.DeclarationSpecifiers, Node):
+#class TypeSpecifiers(Node):
+ """
+ """
+ def __eq__(self,other):
+ " unordered equality "
+ if not isinstance(other,Node):
+ return False
+ a, b = self[:], other[:]
+ a.sort()
+ b.sort()
+ return a == b
+
+ def __hash__( self ):
+ hs = [hash(item) for item in self]
+ hs.sort()
+ return hash( tuple([hash(type(self))]+hs) )
+
+# def is_struct(self):
+# return self.find(Struct) is not None
+
+
+class TypeSpecifiers(genpyx.TypeSpecifiers, DeclarationSpecifiers):
+ """
+ """
+ def get_tagged(self):
+ if self and isinstance(self[0],Taged):
+ return self[0]
+ tagged = property(get_tagged)
+
+ def get_compound(self):
+ if self and isinstance(self[0],Compound):
+ return self[0]
+ compound = property(get_compound)
+
+ def get_struct(self):
+ if self and isinstance(self[0],Struct):
+ return self[0]
+ struct = property(get_struct)
+
+ def get_union(self):
+ if self and isinstance(self[0],Union):
+ return self[0]
+ union = property(get_union)
+
+ def get_enum(self):
+ if self and isinstance(self[0],Enum):
+ return self[0]
+ enum = property(get_enum)
+
+ def cbasetype(self):
+ node = Node.cbasetype(self)
+# node.expose( TypeSpecifiers )
+# if node.deepfind(TypeSpecifiers) != node:
+ return node
+
+class Initializer(genpyx.Initializer, Node):
+ """
+ """
+ pass
+
+
+
+class Declaration(genpyx.Declaration, Node):
+ """
+ """
+ def do_spec(self):
+ " distribute DeclarationSpecifiers over each Declarator "
+ spec=self[0]
+ assert isinstance(spec,DeclarationSpecifiers), spec.deepstr()
+ self.pop(0)
+ for declarator in self:
+ assert isinstance(declarator,Declarator)
+ #if isinstance(declarator,DeclarationSpecifiers #huh?
+ ##for node in spec:
+ ##declarator.append(node.clone())
+ declarator.append(spec)
+
+ def transform(self):
+ # children go first
+ for node in self.nodes():
+ if isinstance(node,Declaration):
+ node.do_spec()
+ node.file = self.file # overkill ?
+ self.expose(Declaration)
+
+ #def explain(self):
+ #return string.join([x.explain() for x in self],", ")
+ #return string.join(map(lambda x:x.explain(),self),", ")
+
+
+class ParameterDeclaration(genpyx.ParameterDeclaration, Declaration):
+ """
+ """
+ pass
+
+
+class StructDeclaration(genpyx.StructDeclaration, Declaration):
+ """
+ """
+ pass
+
+
+class TransUnit(genpyx.TransUnit, Node):
+ """
+ Top level node.
+ """
+ def __init__( self, item ): # XX __init__ uses different signature ! XX
+ if type(item)==str:
+ node = cparse.TransUnit()
+ node.parse(item)
+ else:
+ node = item
+ assert isinstance( node, cparse.TransUnit ), str(node)
+ Node.__init__(self)
+ self[:] = [ self.convert(child) for child in node ]
+ self.__dict__.update( node.__dict__ )
+ assert "name" not in node.__dict__
+
+ self.syms = {} # map identifier names to their Declarator's
+ self.typedefs = {} # map names to Typedef's
+ self.tag_lookup = {} # map struct, union, enum tags to Taged's
+
+ # XX should call transform here XX
+
+# print self.deepstr()
+ def __getstate__( self ):
+ nodes = tuple( [ repr(node) for node in self ] )
+ typedefs = tuple( [ (key,repr(val)) for key,val in self.typedefs.items() ] )
+ return nodes, typedefs
+ def __setstate__( self, state ):
+ Node.__init__(self)
+ nodes, typedefs = state
+ nodes = [ eval(node) for node in nodes ]
+ self[:] = nodes
+ typedefs = [ (key,eval(val)) for key,val in typedefs ]
+ self.typedefs = dict(typedefs)
+
+ def convert( self, node ):
+# name = node.__class__.__name__
+# cls = globals()[ name ]
+ cls = cls_lookup[ type(node) ]
+ _node = cls()
+ for child in node:
+ if isinstance(child, node_module.Node):
+ child = self.convert( child )
+ else:
+ assert child is None or type(child) in (str, int), type(child)
+ _node.append( child )
+ _node.__dict__.update( node.__dict__ )
+ return _node
+
+ def strip(self,files):
+ " leave only the declarations from <files> "
+ i=0
+ while i<len(self):
+ if self[i].file in files:
+ i=i+1
+ else:
+ self.pop(i)
+
+ def mark(self,cb,verbose=False):
+ " mark our child nodes such that cb(node).. mark dependants too. prune unmarked objects. "
+ # mark the nodes:
+ for node in self:
+ node.marked = cb(self, node)
+ if verbose and node.marked:
+ print '1:', node.cstr()
+ # propagate dependancy:
+ i=len(self)
+ while i:
+ i-=1 # we go backwards
+ for node in self[i].nodes(): # bottom-up search
+ if verbose and self[i].marked and not node.marked:
+ print '2:', str(node), '<--', self[i].cstr()
+ node.marked = self[i].marked or node.marked
+ if type(node)==TypeAlias:
+ if verbose and node.marked and not node.typedef.marked:
+ print '3:', node.typedef.cstr(), '<--', node.cstr()
+ node.typedef.marked = node.typedef.marked or node.marked
+ if isinstance(node, Taged):
+ if node.tag.name in self.tag_lookup:
+ _node = self.tag_lookup[ node.tag.name ] # look-up the def'n
+ if verbose and node.marked and not _node.marked:
+ print '4:', _node.cstr(), '<--', self[i].cstr()
+# _node.marked = _node.marked or self[i].marked
+ _node.marked = _node.marked or node.marked
+# else:
+# # this guy has no tag
+# print "lost tag:", self[i].cstr()
+
+ # XX struct defs acquire marks from members, but XX
+ # XX ordinary definitions do not XX
+# if node.marked and not self[i].marked:
+# # one of my descendants is marked
+# if verbose:
+# print '5:', self[i].cstr(), '<--', node.cstr()
+# self[i].marked = True
+# if verbose:
+# for node in self:
+# print '-'*79
+# if node.enum:
+# print str(node.marked) + ': ' + node.cstr()
+ # prune:
+ f = open(".tmp/pruned.txt","w")
+ f.write("// This file autogenerated by '%s' .\n"%__file__)
+ f.write("// List of functions pruned from parse tree, for various reasons.\n\n")
+ i=0
+ while i<len(self):
+ if not self[i].marked:
+ if verbose: print 'pop:', self[i].cstr()
+ f.write( self[i].cstr() + "\n" )
+ self.pop(i)
+# elif self[i].compound:
+# # XXXX for now, rip out all struct members XXXX
+# self[i].compound[1:] = [] # XX encapsulation
+# i = i + 1
+ else:
+ i = i + 1
+ for key, value in self.syms.items():
+ if not value.marked:
+ del self.syms[key]
+ for key, value in self.typedefs.items():
+ if not value.marked:
+ del self.typedefs[key]
+ for key, value in self.tag_lookup.items():
+ if not value.marked:
+ del self.tag_lookup[key]
+# sys.exit(1)
+
+ def assert_no_dups(self):
+ check={}
+ for node in self.nodes():
+ assert not check.has_key(id(node))
+ check[id(node)]=1
+
+ def transform(self, verbose=False, test_parse=False, test_types=False ):
+ i=0
+ while i < len(self):
+ if verbose: print "##"*25
+ declaration=self[i]
+
+ if verbose: declaration.psource()
+ if verbose: print declaration.deepstr(),'\n'
+ assert isinstance(declaration,Declaration)
+ if verbose: print "# expose declarators from declaration"
+
+ # STAGE 1
+ declaration.transform()
+
+ if verbose: print declaration.deepstr(),'\n'
+ self[i:i+1] = declaration # expose declarators from declaration
+
+ for j in range(len(declaration)):
+ declarator=self[i]
+
+ assert isinstance(declarator,Declarator)
+ if verbose: print "# declarator.transform()"
+
+ # STAGE 2
+ declarator.transform()
+
+ if verbose: print declarator.deepstr(),'\n'
+ if verbose: print "# self.visit_declarator(declarator)"
+
+ # STAGE 3
+ self[i] = declarator = self.visit_declarator(declarator)
+
+ # STAGE 4
+ if declarator.name:
+ if isinstance(declarator, Typedef):
+ if verbose: print "# typedef %s" % declarator.name
+ self.typedefs[ declarator.name ] = declarator
+ else:
+ if verbose: print "# sym %s" % declarator.name
+ self.syms[ declarator.name ] = declarator
+
+ for node in declarator.nodes():
+ if isinstance(node,Taged) and node.tag.name:
+ assert type(node.tag.name)==str, node.deepstr()
+ taged = self.tag_lookup.get( node.tag.name, None )
+ if taged is None:
+ if verbose: print "# tag lookup %s = %s" % (declarator.name, node.tag.name)
+ self.tag_lookup[ node.tag.name ] = node
+ elif not taged.has_members():
+ # this is (maybe) the definition of this tag
+ if verbose: print "# definition %s = %s" % (declarator.name, node.tag.name)
+ self.tag_lookup[ node.tag.name ] = node
+
+ # Annotate the TypeAlias's
+ for node in declarator.deepfilter( TypeAlias ):
+ name = node[0]
+ assert type( name ) == str
+ node.typedef = self.typedefs[ name ]
+
+ if verbose: print declarator.deepstr(),'\n'
+ #print declarator.ctype().deepstr(),'\n'
+ #assert declarator.clone() == declarator
+
+ ###################################################
+ # TESTS:
+ if test_parse:
+ # test that parse of cstr gives same answer
+ cstr = declarator.cstr()+';\n'
+ if verbose: print '# '+cstr.replace('\n','\n# ')
+ #print
+ if isinstance(declarator,Typedef):
+ name = declarator[0][0]
+ assert type(name)==str
+ self.lexer.rmtypedef( name )
+ declaration = cparse.Declaration()
+ self.lexer.lex( cstr )
+ #print self.lexer.err_string()
+ declaration.parse( self.lexer, Symbols() ) # use new name-space
+ #declaration.parse( Lexer( cstr ), Symbols() )
+ declaration = self.convert(declaration)
+ declaration.transform()
+ assert len(declaration)==1
+ decl=declaration[0]
+ decl.transform()
+ decl = self.visit_declarator(decl)
+ if decl!=declarator:
+ if verbose: print "#???????????"
+ if verbose: print decl.deepstr(),'\n\n'
+ #if verbose: print declaration.deepstr(),'\n\n'
+ #assert 0
+ elif verbose: print '# OK\n'
+
+ if test_types:
+ node = declarator.ctype()
+ declare_str= node.declare_str("my_name")
+ if verbose: print "# declarator.ctype() "
+ if verbose: print node.deepstr(),"\n"
+ if verbose: print "#",declare_str.replace('\n','\n# '), '\n'
+
+ i=i+1
+ return self
+
+ def visit(self,node):
+ #print 'visit(%s)'%node
+ for _node in node:
+ if isinstance(_node,Declarator):
+ _node = self.visit_declarator(_node) # XX replace _node
+ elif isinstance(_node,Node):
+ _node = self.visit(_node) # XX replace _node
+ return node
+
+ def visit_declarator(self,decl):
+ assert isinstance(decl,Declarator)
+
+ # STAGE 3.a
+ tp = decl.deepfind(Typedef)
+ if tp is not None:
+ decl.deeprm(tp)
+ tp.init_from( decl ) # warning: shallow init
+ decl = tp
+
+ # STAGE 3.b
+ i=len(decl)
+ # accumulate nodes (they become the children of decl)
+ children=[]
+ while i:
+ i=i-1
+ node=decl.pop(i)
+ if isinstance(node,Declarator):
+ node = self.visit_declarator(node) # replace node
+ else:
+ node = self.visit(node) # replace node
+ if isinstance(node,Pointer):
+ node+=children
+ children=[node]
+ elif isinstance(node,Function):
+ node+=children
+ children=[node]
+ elif isinstance(node,Array):
+ while children:
+ node.insert(0,children.pop())
+ children=[node]
+ # array size (if any) at end
+ #elif isinstance(node,Identifier):
+ #node+=children
+ #children=[node]
+ else:
+ # accumulate
+ children.insert(0,node)
+ decl[:]=children
+ return decl
+
+ cstr = None
+ ctype = None
+ cbasetype = None
+
+
+# remap the global class definitions in genpyx to
+# point to the definitions in this module
+gbl = globals()
+for key, val in gbl.items():
+ if type(val)==type:
+ if issubclass(val,Node):
+ setattr( genpyx, key, val )
+assert genpyx.Node == Node
+
+cls_lookup = {
+# Node : Node ,
+ cparse.BasicType : BasicType ,
+ cparse.Qualifier : Qualifier ,
+ cparse.StorageClass : StorageClass ,
+ cparse.Ellipses : Ellipses ,
+ cparse.GCCBuiltin : GCCBuiltin ,
+ cparse.Identifier : Identifier ,
+ cparse.TypeAlias : TypeAlias ,
+ cparse.Function : Function ,
+ cparse.Pointer : Pointer ,
+ cparse.Array : Array ,
+ cparse.Tag : Tag ,
+ cparse.Compound : Compound ,
+ cparse.Struct : Struct ,
+ cparse.Union : Union ,
+ cparse.Enum : Enum ,
+ cparse.Declarator : Declarator ,
+ cparse.Typedef : Typedef ,
+ cparse.AbstractDeclarator : AbstractDeclarator ,
+ cparse.FieldLength : FieldLength ,
+ cparse.StructDeclarator : StructDeclarator ,
+ cparse.DeclarationSpecifiers : TypeSpecifiers ,
+ cparse.TypeSpecifiers : TypeSpecifiers ,
+ cparse.Initializer : Initializer ,
+ cparse.Declaration : Declaration ,
+ cparse.ParameterDeclaration : ParameterDeclaration ,
+ cparse.StructDeclaration : StructDeclaration ,
+ cparse.TransUnit : TransUnit ,
+}
+
+
diff --git a/tools/python-yasm/pyxelator/lexer.py b/tools/python-yasm/pyxelator/lexer.py
new file mode 100755
index 0000000..c161219
--- /dev/null
+++ b/tools/python-yasm/pyxelator/lexer.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python
+""" cdecl.py - parse c declarations
+
+(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
+Released under GNU LGPL license.
+
+version 0.xx
+
+"""
+
+import sys
+import string
+import types
+import copy
+
+#from cparse import BasicType, Qualifier, StorageClass, Typedef, Ellipses, GCCBuiltin
+#from cparse import *
+
+import cparse as host
+
+class LexError(Exception):
+ pass
+
+class Lexer(object):
+ def __init__(self,s="",verbose=0,**kw):
+ self.verbose = verbose
+ self.lookup = {} # a map for keywords and typedefs
+ for t in \
+ "float double void char int".split():
+ self.lookup[t] = host.BasicType( t )
+ for t in \
+ "register signed unsigned short long const volatile inline".split(): # inline here ???
+ self.lookup[t] = host.Qualifier( t )
+ for t in "extern static auto".split():
+ self.lookup[t] = host.StorageClass( t )
+ self.lookup['typedef'] = host.Typedef()
+ #self.lookup['__inline__'] = host.GCCBuiltin('__inline__')
+ #self.lookup['__extension__'] = host.Qualifier('__extension__')
+ self.lookup['...'] = host.Ellipses()
+ if s:
+ self.lex(s)
+ for key in kw.keys():
+ self.__dict__[key] = kw[key]
+
+ def lex(self,s):
+ self.stack = None
+ self.lines = s.splitlines()
+ self.set_state("","",0,0)
+ self.so_file = ""
+ self._newline()
+ self.get_token() # start
+
+ def mktypedef(self,tok,node):
+ if self.verbose:
+ print "%s.mktypedef(%s,%s)"%(self,tok,node)
+ self.lookup[ tok ] = node
+
+ def rmtypedef(self,tok):
+ " used in round trip testing "
+# print "# rmtypedef(%s)"%tok
+ assert isinstance( self.lookup[ tok ], host.Node ) # existance
+ del self.lookup[ tok ]
+
+ def _get_kind(self,tok):
+ #print '_get_kind(%s)'%tok,self.lookup
+ try:
+ return self.lookup[tok]
+ #return self.lookup[tok].clone()
+ except KeyError:
+ if tok.startswith("__builtin"):
+ node = host.GCCBuiltin(tok)
+ self.lookup[tok] = node
+ return node
+ #elif tok in ( "__extension__", ):
+ #node = GCCBuiltin(tok)
+ #self.lookup[tok] = node
+ #return node
+ return None
+
+ def _newline(self):
+ while self.lno < len(self.lines):
+ line = self.lines[self.lno]
+ if not line or line[0] != "#":
+ break
+ l = line.split('"')
+ assert len(l)>=2
+ self.so_file = l[1]
+ #self.so_lno = int( l[0].split()[1] )
+ #sys.stderr.write("# %s %s: %s\n"%(so_lno,so_file,l))
+ self.lno+=1
+
+ def get_brace_token( self ):
+ self.push_state()
+ ident_chars0 = string.letters+"_"
+ ident_chars1 = string.letters+string.digits+"_"
+ tok, kind = "", ""
+ while self.lno < len(self.lines):
+ s = self.lines[self.lno]
+ i=self.col
+ while i < len(s):
+ if s[i] not in '{}':
+ i=i+1
+ continue
+ else:
+ tok = s[i]
+ kind = tok
+ self.col = i+1
+ break
+ # keep moving
+ #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] )
+ i=i+1
+ if i==len(s):
+ # nothing found
+ assert tok == ""
+ self.col=0
+ self.lno+=1
+ self._newline()
+ else:
+ assert tok
+ break
+ self.set_state(tok,kind,self.lno,self.col)
+
+ def get_token(self):
+ self.push_state()
+ ident_chars0 = string.letters+"_"
+ ident_chars1 = string.letters+string.digits+"_"
+ tok, kind = "", ""
+ while self.lno < len(self.lines):
+ s = self.lines[self.lno]
+ i=self.col
+ while i < len(s):
+ if s[i].isspace():
+ i=i+1
+ continue
+ #if s[i] in ident_chars0:
+ if s[i].isalpha() or s[i]=='_':
+ # identifier
+ j=i+1
+ while j<len(s):
+ if s[j] in ident_chars1:
+ j=j+1
+ else:
+ break
+ tok = s[i:j]
+ self.col = j
+ kind = self._get_kind(tok)
+ break
+ if s[i].isdigit() or \
+ (i+1<len(s) and s[i] in '+-.' and s[i+1].isdigit()):
+ # number literal
+ is_float = s[i]=='.'
+ is_hex = s[i:i+2]=='0x'
+ if is_hex:
+ i=i+2
+ assert s[i].isdigit() or s[i] in "abcdefABCDEF", self.err_string()
+ j=i+1
+ while j<len(s):
+ #print "lex ",repr(s[i]),is_float
+ if s[j].isdigit() or (is_hex and s[j] in "abcdefABCDEF"):
+ j=j+1
+ elif s[j]=='.' and not is_float:
+ assert not is_hex
+ j=j+1
+ is_float=1
+ else:
+ break
+ tok = s[i:j]
+ self.col = j
+ if is_float:
+ kind = float(tok)
+ elif is_hex:
+ kind = int(tok,16)
+ else:
+ kind = int(tok)
+ break
+ if s[i:i+3]=='...':
+ # ellipses
+ #sys.stderr.write( "ELLIPSES "+str(self.get_state()) )
+ tok = s[i:i+3]
+ kind = self._get_kind(tok)
+ self.col = i+3
+ break
+ if s[i] in '*/{}()[]:;,=+-~.<>|&':
+ tok = s[i]
+ kind = tok
+ self.col = i+1
+ break
+ if s[i] == "'":
+ j = i+2
+ while j<len(s) and s[j]!="'":
+ j+=1
+ if j==len(s):
+ raise LexError( self.err_string() + "unterminated char constant" )
+ tok = s[i:j+1]
+ self.col = j+1
+ kind = s[i:j+1]
+ break
+ # keep moving
+ #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] )
+ sys.stderr.write( "lexer ignoring '%s' lno=%d\n"%(s[i],self.lno+1) )
+ i=i+1
+ # end while i < len(s)
+ if i==len(s):
+ # nothing found, go to next line
+ assert tok == ""
+ self.col=0
+ self.lno+=1
+ self._newline()
+ else:
+ # we got one
+ assert tok
+ break
+ # end while self.lno < len(self.lines):
+ self.set_state(tok,kind,self.lno,self.col)
+
+ def err_string(self):
+ "Return helpful error string :)"
+ return self.lines[self.lno]+"\n"+" "*self.col+"^\n"
+
+ def push_state(self):
+ self.stack = self.get_state() # a short stack :)
+ #self.stack.push( self.get_state() )
+
+ def unget_token(self):
+ assert self.stack is not None
+ self.set_state(*self.stack)
+ self.stack = None
+
+ def set_state(self,tok,kind,lno,col):
+ if self.verbose:
+ print "tok,kind,lno,col = ",(tok,kind,lno,col)
+ self.tok = tok
+ self.kind = kind
+ self.lno = lno # line
+ self.col = col # column
+
+ def get_state(self):
+ return self.tok,self.kind,self.lno,self.col
+
+ def get_file(self):
+ return self.so_file
+
+###################################################################
+#
+###################################################################
+#
+
+
diff --git a/tools/python-yasm/pyxelator/node.py b/tools/python-yasm/pyxelator/node.py
new file mode 100755
index 0000000..5ce9043
--- /dev/null
+++ b/tools/python-yasm/pyxelator/node.py
@@ -0,0 +1,301 @@
+#!/usr/bin/env python
+""" cdecl.py - parse c declarations
+
+(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
+Released under GNU LGPL license.
+
+version 0.xx
+
+"""
+
+import string
+
+
+class Node(list):
+ " A node in a parse tree "
+
+ def __init__(self,*items,**kw):
+ list.__init__( self, items )
+ self.lock1 = 0 # these two should be properties (simplifies serializing)
+ self.lock2 = 0
+ self.verbose = 0
+ for key in kw.keys():
+ self.__dict__[key] = kw[key]
+
+ def __str__(self):
+ attrs = []
+ for item in self:
+ if isinstance(item,Node):
+ attrs.append( str(item) )
+ else:
+ attrs.append( repr(item) )
+ attrs = ','.join(attrs)
+ return "%s(%s)"%(self.__class__.__name__,attrs)
+
+ def safe_repr( self, tank ):
+ tank[ str(self) ] = None
+ attrs = []
+ for item in self:
+ if isinstance(item,Node):
+ attrs.append( item.safe_repr(tank) ) # can we use repr here ?
+ else:
+ attrs.append( repr(item) )
+ # this is the dangerous bit:
+ for key, val in self.__dict__.items():
+ if isinstance(val,Node):
+ if str(val) not in tank:
+ attrs.append( '%s=%s'%(key,val.safe_repr(tank)) )
+ else:
+ attrs.append( '%s=%s'%(key,repr(val)) )
+ attrs = ','.join(attrs)
+ return "%s(%s)"%(self.__class__.__name__,attrs)
+
+ def __repr__(self):
+ #attrs = ','.join( [repr(item) for item in self] + \
+ # [ '%s=%s'%(key,repr(val)) for key,val in self.__dict__.items() ] )
+ #return "%s%s"%(self.__class__.__name__,tuple(attrs))
+ return self.safe_repr({})
+
+ def __eq__(self,other):
+ if not isinstance(other,Node):
+ return 0
+ if len(self)!=len(other):
+ return 0
+ for i in range(len(self)):
+ if not self[i]==other[i]:
+ return 0
+ return 1
+
+ def __ne__(self,other):
+ return not self==other
+
+ def filter(self,cls):
+ return [x for x in self if isinstance(x,cls)]
+ #return filter( lambda x:isinstance(x,cls), self )
+
+ def deepfilter(self,cls):
+ " bottom-up "
+ return [x for x in self.nodes() if isinstance(x,cls)]
+
+ def find(self,cls):
+ for x in self:
+ if isinstance(x,cls):
+ return x
+ return None
+
+ def deepfind(self,cls):
+ " bottom-up isinstance search "
+ for x in self:
+ if isinstance(x,Node):
+ if isinstance(x,cls):
+ return x
+ node = x.deepfind(cls)
+ if node is not None:
+ return node
+ if isinstance(self,cls):
+ return self
+ return None
+
+ def leaves(self):
+ for i in self:
+ if isinstance( i, Node ):
+ for j in i.leaves():
+ yield j
+ else:
+ yield i
+
+ def nodes(self):
+ " bottom-up iteration "
+ for i in self:
+ if isinstance( i, Node ):
+ for j in i.nodes():
+ yield j
+ yield self
+
+ def deeplen(self):
+ i=0
+ if not self.lock2:
+ self.lock2=1
+ for item in self:
+ i+=1
+ if isinstance(item,Node):
+ i+=item.deeplen()
+ self.lock2=0
+ else:
+ i+=1
+ return i
+
+ def deepstr(self,level=0,comment=False,nl='\n',indent=' '):
+ if self.deeplen() < 4:
+ nl = ""; indent = ""
+ #else:
+ #nl="\n"; indent = " "
+ s = []
+ if not self.lock1:
+ self.lock1=1
+ for item in self:
+ if isinstance(item,Node):
+ s.append( indent*(level+1)+item.deepstr(level+1,False,nl,indent) )
+ else:
+ s.append( indent*(level+1)+repr(item) )
+ self.lock1=0
+ else:
+ for item in self:
+ if isinstance(item,Node):
+ s.append( indent*(level+1)+"<recursion...>" )
+ else:
+ s.append( indent*(level+1)+"%s"%repr(item) )
+ s = "%s(%s)"%(self.__class__.__name__,nl+string.join(s,","+nl))
+ if comment:
+ s = '#' + s.replace('\n','\n#')
+ return s
+
+ def clone(self):
+ items = []
+ for item in self:
+ if isinstance(item,Node):
+ item = item.clone()
+ items.append(item)
+ # we skip any attributes...
+ return self.__class__(*items)
+
+ def fastclone(self):
+ # XX is it faster ???
+ #print "clone"
+ nodes = [self]
+ idxs = [0]
+ itemss = [ [] ]
+ while nodes:
+ assert len(nodes)==len(idxs)==len(itemss)
+ node = nodes[-1]
+ items = itemss[-1]
+ assert idxs[-1] == len(items)
+ while idxs[-1]==len(node):
+ # pop
+ _node = node.__class__( *items )
+ _node.__dict__.update( node.__dict__ )
+ nodes.pop(-1)
+ idxs.pop(-1)
+ itemss.pop(-1)
+ if not nodes:
+ #for node0 in self.nodes():
+ #for node1 in _node.nodes():
+ #assert node0 is not node1
+ #assert _node == self
+ return _node # Done !!
+ node = nodes[-1]
+ items = itemss[-1]
+ items.append(_node) # set
+ idxs[-1] += 1
+ assert idxs[-1] == len(items)
+ #assert idxs[-1] < len(node), str( (node,nodes,idxs,itemss) )
+
+ _node = node[ idxs[-1] ]
+ # while idxs[-1]<len(node):
+ if isinstance(_node,Node):
+ # push
+ nodes.append( _node )
+ idxs.append( 0 )
+ itemss.append( [] )
+ else:
+ # next
+ items.append(_node)
+ idxs[-1] += 1
+ assert idxs[-1] == len(items)
+
+ def expose(self,cls):
+ ' expose children of any <cls> instance '
+ # children first
+ for x in self:
+ if isinstance(x,Node):
+ x.expose(cls)
+ # now the tricky bit
+ i=0
+ while i < len(self):
+ if isinstance(self[i],cls):
+ node=self.pop(i)
+ for x in node:
+ assert not isinstance(x,cls)
+ # pass on some attributes
+ if hasattr(node,'lines') and not hasattr(x,'lines'):
+ x.lines=node.lines
+ if hasattr(node,'file') and not hasattr(x,'file'):
+ x.file=node.file
+ self.insert(i,x) # expose
+ i=i+1
+ assert i<=len(self)
+ else:
+ i=i+1
+
+ def get_parent( self, item ): # XX 25% CPU time here XX
+ assert self != item
+ if item in self:
+ return self
+ for child in self:
+ if isinstance(child, Node):
+ parent = child.get_parent(item)
+ if parent is not None:
+ return parent
+ return None
+
+ def expose_node( self, item ):
+ assert self != item
+ parent = self.get_parent(item)
+ idx = parent.index( item )
+ parent[idx:idx+1] = item[:]
+
+ def delete(self,cls):
+ ' delete any <cls> subtree '
+ for x in self:
+ if isinstance(x,Node):
+ x.delete(cls)
+ # now the tricky bit
+ i=0
+ while i < len(self):
+ if isinstance(self[i],cls):
+ self.pop(i)
+ else:
+ i=i+1
+
+ def deeprm(self,item):
+ ' remove any items matching <item> '
+ for x in self:
+ if isinstance(x,Node):
+ x.deeprm(item)
+ # now the tricky bit
+ i=0
+ while i < len(self):
+ if self[i] == item:
+ self.pop(i)
+ else:
+ i=i+1
+
+ def idem(self,cls):
+ " <cls> is made idempotent "
+ # children first
+ for x in self:
+ if isinstance(x,Node):
+ x.idem(cls)
+ if isinstance(self,cls):
+ # now the tricky bit
+ i=0
+ while i < len(self):
+ if isinstance(self[i],cls):
+ node = self.pop(i)
+ for x in node:
+ assert not isinstance(x,cls)
+ self.insert(i,x) # idempotent
+ i=i+1
+ assert i<=len(self)
+ else:
+ i=i+1
+
+if __name__=="__main__":
+ node = Node( 'a', Node(1,2), Node(Node(Node(),1)) )
+
+ print node
+ print node.clone()
+
+
+
+
diff --git a/tools/python-yasm/pyxelator/parse_core.py b/tools/python-yasm/pyxelator/parse_core.py
new file mode 100755
index 0000000..84fb894
--- /dev/null
+++ b/tools/python-yasm/pyxelator/parse_core.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+""" cdecl.py - parse c declarations
+
+(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
+Released under GNU LGPL license.
+
+version 0.xx
+
+"""
+
+import sys
+
+
+class Symbols(object):
+ def __init__(self,parent=None,verbose=False):
+ self.verbose = verbose
+ self.parent=parent # are we a nested namespace?
+ self.lookup = {} # identifiers
+ self.tags = {} # struct, union, enum tags
+
+ def __str__(self):
+ return "Symbols(%s,%s)"%(self.lookup,self.tags)
+
+ def __getitem__(self,key):
+ try:
+ item = self.lookup[key]
+ except KeyError:
+ item = None
+ #if self.parent is not None:
+ #item = self.parent[item]
+ ## self[key] = item # cache
+ #if self.verbose: print "%s.get('%s')='%s'"%(self,key,item)
+ return item
+
+ def __setitem__(self,key,val):
+ #if self.verbose: print "%s.set('%s','%s')"%(self,key,val)
+ assert val is not None
+ self.lookup[key] = val
+
+ def set_tag(self,key,val):
+ #if self.verbose: print "%s.set_tag(%s,%s)"%(self,key,val)
+ assert len(key)
+ self.tags[key] = val
+
+ def deep_get_tag(self,key):
+ try:
+ item = self.tags[key]
+ except KeyError:
+ item = None
+ if self.parent is not None:
+ item = self.parent.deep_get_tag(key)
+ #if self.verbose: print "%s.get_tag(%s)=%s"%(self,key,item)
+ return item
+
+ def get_tag(self,key):
+ try:
+ item = self.tags[key]
+ except KeyError:
+ item = None
+ #if self.verbose: print "%s.get_tag(%s)=%s"%(self,key,item)
+ return item
+
+###################################################################
+#
+###################################################################
+#
+
+
+class ParseError(Exception):
+ def __init__(self,*e):
+ self.e = e
+
+ def __str__(self):
+ return "".join(map(str,self.e))
+
+
+class Parser(object):
+ def parse_error(self,lexer,reason="?",*blah):
+ sys.stderr.write( "%s.parse_error()\n"%self.deepstr() )
+ sys.stderr.write( "at line %s: %s\n"%(lexer.lno+1,reason) )
+ sys.stderr.write( lexer.err_string() )
+ raise ParseError(reason,*blah)
+
+ def expected_error(self,lexer,*l):
+ self.parse_error( lexer, "expected %s, got '%s'"\
+ %(" or ".join(map(repr,l)),lexer.tok))
+
+ def consume(self,lexer,tok):
+ if lexer.tok != tok:
+ self.expected_error(lexer, tok)
+ lexer.get_token()
+
+ def parse_enter(self,lexer):
+ #return
+ self.start_lno=lexer.lno
+ self.file=lexer.so_file
+
+ def parse_leave(self,lexer):
+ #return
+ self.lines = lexer.lines[self.start_lno:max(lexer.lno,self.start_lno+1)]
+
+###################################################################
+#
+###################################################################
+#
+
diff --git a/tools/python-yasm/pyxelator/work_unit.py b/tools/python-yasm/pyxelator/work_unit.py
new file mode 100755
index 0000000..31ab3e5
--- /dev/null
+++ b/tools/python-yasm/pyxelator/work_unit.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+
+"""
+
+(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
+Released under GNU LGPL license.
+
+version 0.xx
+
+"""
+
+
+import sys
+import os
+
+import cparse
+import ir
+
+def callcmd(cmd):
+ try:
+ from subprocess import call
+ try:
+ retcode = call(cmd, shell=True)
+ assert retcode == 0, "command failed: %s"%cmd
+ except OSError, e:
+ assert False, "command failed: %s"%e
+ except ImportError:
+ status = os.system( cmd )
+ assert status == 0, "command failed: %s"%cmd
+
+class WorkUnit(object):
+ def __init__(self, files, modname, filename,
+ std=False, strip=False, mark_cb=None,
+ extradefs="", use_header=None, CC="gcc", CPP="gcc -E",
+ CPPFLAGS=""):
+ self.files = tuple(files)
+ self.modname = modname
+ self.filename = filename
+ self.CPPFLAGS = CPPFLAGS
+ self.CPP = CPP
+ if CC == 'g++':
+ self.CPPFLAGS += " -D__cplusplus"
+ self.std = std
+ self.strip = strip
+ self.mark_cb = mark_cb
+ self.node = None
+ self.extradefs = extradefs
+ self.CC = CC
+ self.use_header = use_header
+
+ def mkheader( self ):
+ if self.use_header:
+ return self.use_header
+ tmpname = str(abs(hash( (self.files,self.CPPFLAGS) )))
+ name = '.tmp/%s' % tmpname
+ ifile = open( name+'.h', "w" )
+ ifile.write( """
+#define __attribute__(...)
+#define __const const
+#define __restrict
+#define __extension__
+#define __asm__(...)
+#define __asm(...)
+#define __inline__
+#define __inline
+""" )
+ for filename in self.files:
+ if self.std:
+ line = '#include <%s>\n'%filename
+ else:
+ line = '#include "%s"\n'%filename
+ ifile.write( line )
+ print line,
+ ifile.close()
+ cmd = '%s %s %s > %s'%(self.CPP,name+'.h',self.CPPFLAGS,name+'.E')
+ sys.stderr.write( "# %s\n" % cmd )
+ callcmd( cmd )
+ assert open(name+'.E').read().count('\n') > 10, "failed to run preprocessor"
+ cmd = '%s -dM %s %s > %s'%(self.CPP,name+'.h',self.CPPFLAGS,name+'.dM')
+ sys.stderr.write( "# %s\n" % cmd )
+ callcmd( cmd )
+ assert open(name+'.dM').read().count('\n') > 10, "failed to run preprocessor with -dM"
+ return name
+
+ def parse(self, verbose=False):
+ sys.stderr.write( "# parse %s\n" % str(self.files) )
+ name = self.mkheader()
+ # read macros
+ f = open(name+'.dM')
+ macros = {}
+ for line in f.readlines():
+ if line:
+ macro = line.split()[1]
+ if macro.count('('):
+ macro = macro[:macro.index('(')]
+ macros[macro] = None
+ #keys = macros.keys()
+ #keys.sort()
+ #for key in keys:
+ #print key
+ self.macros = macros
+ # parse preprocessed code
+ f = open(name+'.E')
+ s = f.read() + self.extradefs
+ self.node = cparse.TransUnit(verbose = verbose)
+ sys.stderr.write( "# parsing %s lines\n" % s.count('\n') )
+ self.node.parse( s )
+ if self.strip:
+ self.node.strip(self.files)
+
+ def transform(self, verbose=False, test_parse=False, test_types=False):
+ sys.stderr.write( "# processing...\n" )
+ self.node = ir.TransUnit( self.node )
+ self.node.transform(verbose, test_parse, test_types)
+ #self.node[0].psource()
+ if self.mark_cb is not None:
+ self.node.mark(self.mark_cb,verbose=False)
+
+ def output( self, func_cb = None ):
+ sys.stderr.write( "# pyxstr...\n" )
+ decls = self.node.pyx_decls(self.files, self.modname, macros = self.macros, func_cb = func_cb, names={}, cprefix="" )
+
+ name = self.filename
+ assert name.endswith(".pyx")
+
+ pxi = name[:-3]+'pxi'
+ file = open( pxi, "w" )
+ file.write(decls)
+ sys.stderr.write( "# wrote %s, %d lines\n" % (pxi,decls.count('\n')) )
+
+ def pprint(self):
+ for decl in self.node:
+ #decl.psource()
+ #cstr = decl.cstr()
+ #cstr = cstr.replace( '\n', '\n# ' )
+ print
+ #print '#', cstr
+ print decl.deepstr()
+
+def file_exists(path):
+ try:
+ os.stat(path)
+ return True
+ except OSError:
+ return False
+
+if sys.platform.count('darwin'):
+ shared_ext = '.dylib'
+else:
+ shared_ext = '.so'
+
+def get_syms(libs, libdirs):
+ # XX write interface to objdump -t XX
+ libnames = []
+ for lib in libs:
+ for ext in shared_ext,'.a':
+ libname = 'lib'+lib+ext
+ for libdir in libdirs:
+ path = libdir+'/'+libname
+ if file_exists(path):
+ libnames.append(path)
+ break
+ #else:
+ #print "cannot find %s lib as %s in %s" % ( lib, libname, libdir )
+ print 'libnames:', libnames
+ syms = {}
+ accept = [ ' %s '%c for c in 'TVWBCDGRS' ]
+ #f = open('syms.out','w')
+ for libname in libnames:
+ try:
+ from subprocess import Popen, PIPE
+ p = Popen(['nm', libname], bufsize=1, stdout=PIPE)
+ fout = p.stdout
+ except ImportError:
+ fin, fout = os.popen2( 'nm %s' % libname )
+ for line in fout.readlines():
+ for acc in accept:
+ if line.count(acc):
+ left, right = line.split(acc)
+ sym = right.strip()
+ if sys.platform.count('darwin'):
+ if sym[0] == '_':
+ sym = sym[1:] # remove underscore prefix
+ if sym.endswith('.eh'):
+ sym = sym[:-len('.eh')]
+ syms[sym] = None
+ #f.write( '%s: %s %s\n' % (sym,line[:-1],libname) )
+ break
+ return syms
+
+
+
diff --git a/tools/python-yasm/pyxelator/wrap_yasm.py b/tools/python-yasm/pyxelator/wrap_yasm.py
new file mode 100755
index 0000000..58553ab
--- /dev/null
+++ b/tools/python-yasm/pyxelator/wrap_yasm.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+"""
+
+(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
+Released under GNU LGPL license.
+
+version 0.xx
+
+"""
+
+
+import sys
+import os
+
+from work_unit import WorkUnit, get_syms
+import ir
+
+
+def mk_tao(CPPFLAGS = "", CPP = "gcc -E", modname = '_yasm', oname = None, YASM_DIR = ".", **options):
+ if oname is None:
+ oname = modname+'.pyx'
+ CPPFLAGS += " -I"+YASM_DIR
+ CPPFLAGS += " -DYASM_PYXELATOR"
+ CPPFLAGS += " -DYASM_LIB_INTERNAL"
+ CPPFLAGS += " -DYASM_BC_INTERNAL"
+ CPPFLAGS += " -DYASM_EXPR_INTERNAL"
+ files = [ 'libyasm.h', 'libyasm/assocdat.h', 'libyasm/bitvect.h' ]
+
+ syms = get_syms( ['yasm'], [YASM_DIR] )
+ def cb(trans_unit, node, *args):
+ name, file = node.name, node.file
+ return True
+ return name in syms
+ extradefs = ""
+ unit = WorkUnit(files,modname,oname,False,mark_cb=cb,extradefs=extradefs,
+ CPPFLAGS=CPPFLAGS, CPP=CPP, **options)
+
+
+ unit.parse( False )
+ unit.transform(verbose=False, test_parse=False, test_types=False)
+ unit.output()
+
+def main():
+ options = {}
+ for i,arg in enumerate(sys.argv[1:]):
+ if arg.count('='):
+ key,val = arg.split('=', 1)
+ options[key]=val
+ mk_tao(**options)
+
+if __name__=="__main__":
+ main()
+
+
+
+
diff --git a/tools/python-yasm/setup.py b/tools/python-yasm/setup.py
new file mode 100644
index 0000000..d4ce2f4
--- /dev/null
+++ b/tools/python-yasm/setup.py
@@ -0,0 +1,88 @@
+#! /usr/bin/env python
+# Build Python extension with configuration file input
+#
+# Copyright (C) 2006 Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+from os.path import basename, join, exists
+
+def ReadSetup(filename):
+ """ReadSetup goes through filename and parses out the values stored
+ in the file. Values need to be stored in a
+ \"key=value format\""""
+ return dict(line.split('=', 1) for line in open(filename))
+
+def ParseCPPFlags(flags):
+ """parse the CPPFlags macro"""
+ incl_dir = [x[2:] for x in flags.split() if x.startswith("-I")]
+ cppflags = [x for x in flags.split() if not x.startswith("-I")]
+ cppflags.append("-DYASM_LIB_INTERNAL")
+ cppflags.append("-DYASM_BC_INTERNAL")
+ cppflags.append("-DYASM_EXPR_INTERNAL")
+ return (incl_dir, cppflags)
+
+def ParseSources(src, srcdir):
+ """parse the Sources macro"""
+ # do the dance of detecting if the source file is in the current
+ # directory, and if it's not, prepend srcdir
+ sources = []
+ for tok in src.split():
+ if tok.endswith(".c"):
+ fn = tok
+ else:
+ continue
+ if not exists(fn):
+ fn = join(srcdir, fn)
+ sources.append(fn)
+
+ return sources
+
+def RunSetup(incldir, cppflags, sources):
+ setup(
+ name='yasm',
+ version='0.0',
+ description='Python bindings for Yasm',
+ author='Michael Urman, Peter Johnson',
+ url='http://www.tortall.net/projects/yasm',
+ ext_modules=[
+ Extension('yasm',
+ sources=sources,
+ extra_compile_args=cppflags,
+ include_dirs=incldir,
+ ),
+ ],
+ cmdclass = dict(build_ext=build_ext),
+ )
+
+if __name__ == "__main__":
+ opts = ReadSetup("python-setup.txt")
+ incldir, cppflags = ParseCPPFlags(opts["includes"])
+ sources = ParseSources(opts["sources"], opts["srcdir"].strip())
+ sources.append('yasm_python.c')
+ if opts["gcc"].strip() == "yes":
+ cppflags.append('-w')
+ RunSetup(incldir, cppflags, sources)
+
diff --git a/tools/python-yasm/symrec.pxi b/tools/python-yasm/symrec.pxi
new file mode 100644
index 0000000..eb56ccf
--- /dev/null
+++ b/tools/python-yasm/symrec.pxi
@@ -0,0 +1,285 @@
+# Python bindings for Yasm: Pyrex input file for symrec.h
+#
+# Copyright (C) 2006 Michael Urman, Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+cdef class Symbol:
+ cdef yasm_symrec *sym
+
+ def __cinit__(self, symrec):
+ self.sym = NULL
+ if PyCObject_Check(symrec):
+ self.sym = <yasm_symrec *>__get_voidp(symrec, Symbol)
+ else:
+ raise NotImplementedError
+
+ # no deref or destroy necessary
+
+ property name:
+ def __get__(self): return yasm_symrec_get_name(self.sym)
+
+ property status:
+ def __get__(self):
+ cdef yasm_sym_status status
+ s = set()
+ status = yasm_symrec_get_status(self.sym)
+ if <int>status & <int>YASM_SYM_USED: s.add('used')
+ if <int>status & <int>YASM_SYM_DEFINED: s.add('defined')
+ if <int>status & <int>YASM_SYM_VALUED: s.add('valued')
+ return s
+
+ property in_table:
+ def __get__(self):
+ return bool(<int>yasm_symrec_get_status(self.sym) &
+ <int>YASM_SYM_NOTINTABLE)
+
+ property visibility:
+ def __get__(self):
+ cdef yasm_sym_vis vis
+ s = set()
+ vis = yasm_symrec_get_visibility(self.sym)
+ if <int>vis & <int>YASM_SYM_GLOBAL: s.add('global')
+ if <int>vis & <int>YASM_SYM_COMMON: s.add('common')
+ if <int>vis & <int>YASM_SYM_EXTERN: s.add('extern')
+ if <int>vis & <int>YASM_SYM_DLOCAL: s.add('dlocal')
+ return s
+
+ property equ:
+ def __get__(self):
+ cdef yasm_expr *e
+ e = yasm_symrec_get_equ(self.sym)
+ if not e:
+ raise AttributeError("not an EQU")
+ return __make_expression(yasm_expr_copy(e))
+
+ property label:
+ def __get__(self):
+ cdef yasm_symrec_get_label_bytecodep bc
+ if yasm_symrec_get_label(self.sym, &bc):
+ return None #Bytecode(bc)
+ else:
+ raise AttributeError("not a label or not defined")
+
+ property is_special:
+ def __get__(self): return bool(yasm_symrec_is_special(self.sym))
+
+ property is_curpos:
+ def __get__(self): return bool(yasm_symrec_is_curpos(self.sym))
+
+ def get_data(self): pass # TODO
+ #return <object>(yasm_symrec_get_data(self.sym, PyYasmAssocData))
+
+ def set_data(self, data): pass # TODO
+ #yasm_symrec_set_data(self.sym, PyYasmAssocData, data)
+
+#
+# Use associated data mechanism to keep Symbol reference paired with symrec.
+#
+
+cdef void __python_symrec_cb_destroy(void *data):
+ Py_DECREF(<object>data)
+cdef void __python_symrec_cb_print(void *data, FILE *f, int indent_level):
+ pass
+__python_symrec_cb = __assoc_data_callback(
+ PyCObject_FromVoidPtr(&__python_symrec_cb_destroy, NULL),
+ PyCObject_FromVoidPtr(&__python_symrec_cb_print, NULL))
+
+cdef object __make_symbol(yasm_symrec *symrec):
+ cdef void *data
+ __error_check()
+ data = yasm_symrec_get_data(symrec,
+ (<__assoc_data_callback>__python_symrec_cb).cb)
+ if data != NULL:
+ return <object>data
+ symbol = Symbol(__pass_voidp(symrec, Symbol))
+ yasm_symrec_add_data(symrec,
+ (<__assoc_data_callback>__python_symrec_cb).cb,
+ <void *>symbol)
+ Py_INCREF(symbol) # We're keeping a reference on the C side!
+ return symbol
+
+cdef class Bytecode
+cdef class SymbolTable
+
+cdef class SymbolTableKeyIterator:
+ cdef yasm_symtab_iter *iter
+
+ def __cinit__(self, symtab):
+ if not isinstance(symtab, SymbolTable):
+ raise TypeError
+ self.iter = yasm_symtab_first((<SymbolTable>symtab).symtab)
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.iter == NULL:
+ raise StopIteration
+ rv = yasm_symrec_get_name(yasm_symtab_iter_value(self.iter))
+ self.iter = yasm_symtab_next(self.iter)
+ return rv
+
+cdef class SymbolTableValueIterator:
+ cdef yasm_symtab_iter *iter
+
+ def __cinit__(self, symtab):
+ if not isinstance(symtab, SymbolTable):
+ raise TypeError
+ self.iter = yasm_symtab_first((<SymbolTable>symtab).symtab)
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.iter == NULL:
+ raise StopIteration
+ rv = __make_symbol(yasm_symtab_iter_value(self.iter))
+ self.iter = yasm_symtab_next(self.iter)
+ return rv
+
+cdef class SymbolTableItemIterator:
+ cdef yasm_symtab_iter *iter
+
+ def __cinit__(self, symtab):
+ if not isinstance(symtab, SymbolTable):
+ raise TypeError
+ self.iter = yasm_symtab_first((<SymbolTable>symtab).symtab)
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ cdef yasm_symrec *sym
+ if self.iter == NULL:
+ raise StopIteration
+ sym = yasm_symtab_iter_value(self.iter)
+ rv = (yasm_symrec_get_name(sym), __make_symbol(sym))
+ self.iter = yasm_symtab_next(self.iter)
+ return rv
+
+cdef int __parse_vis(vis) except -1:
+ if not vis or vis == 'local': return YASM_SYM_LOCAL
+ if vis == 'global': return YASM_SYM_GLOBAL
+ if vis == 'common': return YASM_SYM_COMMON
+ if vis == 'extern': return YASM_SYM_EXTERN
+ if vis == 'dlocal': return YASM_SYM_DLOCAL
+ msg = "bad visibility value %r" % vis
+ PyErr_SetString(ValueError, msg)
+ return -1
+
+cdef class SymbolTable:
+ cdef yasm_symtab *symtab
+
+ def __cinit__(self):
+ self.symtab = yasm_symtab_create()
+
+ def __dealloc__(self):
+ if self.symtab != NULL: yasm_symtab_destroy(self.symtab)
+
+ def use(self, name, line):
+ return __make_symbol(yasm_symtab_use(self.symtab, name, line))
+
+ def define_equ(self, name, expr, line):
+ if not isinstance(expr, Expression):
+ raise TypeError
+ return __make_symbol(yasm_symtab_define_equ(self.symtab, name,
+ yasm_expr_copy((<Expression>expr).expr), line))
+
+ def define_label(self, name, precbc, in_table, line):
+ if not isinstance(precbc, Bytecode):
+ raise TypeError
+ return __make_symbol(yasm_symtab_define_label(self.symtab, name,
+ (<Bytecode>precbc).bc, in_table, line))
+
+ def define_special(self, name, vis):
+ return __make_symbol(
+ yasm_symtab_define_special(self.symtab, name,
+ <yasm_sym_vis>__parse_vis(vis)))
+
+ def declare(self, name, vis, line):
+ return __make_symbol(
+ yasm_symtab_declare(self.symtab, name,
+ <yasm_sym_vis>__parse_vis(vis), line))
+
+ #
+ # Methods to make SymbolTable behave like a dictionary of Symbols.
+ #
+
+ def __getitem__(self, key):
+ cdef yasm_symrec *symrec
+ symrec = yasm_symtab_get(self.symtab, key)
+ if symrec == NULL:
+ raise KeyError
+ return __make_symbol(symrec)
+
+ def __contains__(self, key):
+ cdef yasm_symrec *symrec
+ symrec = yasm_symtab_get(self.symtab, key)
+ return symrec != NULL
+
+ def keys(self):
+ cdef yasm_symtab_iter *iter
+ l = []
+ iter = yasm_symtab_first(self.symtab)
+ while iter != NULL:
+ l.append(yasm_symrec_get_name(yasm_symtab_iter_value(iter)))
+ iter = yasm_symtab_next(iter)
+ return l
+
+ def values(self):
+ cdef yasm_symtab_iter *iter
+ l = []
+ iter = yasm_symtab_first(self.symtab)
+ while iter != NULL:
+ l.append(__make_symbol(yasm_symtab_iter_value(iter)))
+ iter = yasm_symtab_next(iter)
+ return l
+
+ def items(self):
+ cdef yasm_symtab_iter *iter
+ cdef yasm_symrec *sym
+ l = []
+ iter = yasm_symtab_first(self.symtab)
+ while iter != NULL:
+ sym = yasm_symtab_iter_value(iter)
+ l.append((yasm_symrec_get_name(sym), __make_symbol(sym)))
+ iter = yasm_symtab_next(iter)
+ return l
+
+ def has_key(self, key):
+ cdef yasm_symrec *symrec
+ symrec = yasm_symtab_get(self.symtab, key)
+ return symrec != NULL
+
+ def get(self, key, x):
+ cdef yasm_symrec *symrec
+ symrec = yasm_symtab_get(self.symtab, key)
+ if symrec == NULL:
+ return x
+ return __make_symbol(symrec)
+
+ def iterkeys(self): return SymbolTableKeyIterator(self)
+ def itervalues(self): return SymbolTableValueIterator(self)
+ def iteritems(self): return SymbolTableItemIterator(self)
+ def __iter__(self): return SymbolTableKeyIterator(self)
+
diff --git a/tools/python-yasm/tests/Makefile.inc b/tools/python-yasm/tests/Makefile.inc
new file mode 100644
index 0000000..c6df22c
--- /dev/null
+++ b/tools/python-yasm/tests/Makefile.inc
@@ -0,0 +1,13 @@
+EXTRA_DIST += tools/python-yasm/tests/python_test.sh
+EXTRA_DIST += tools/python-yasm/tests/__init__.py
+EXTRA_DIST += tools/python-yasm/tests/test_bytecode.py
+EXTRA_DIST += tools/python-yasm/tests/test_expr.py
+EXTRA_DIST += tools/python-yasm/tests/test_intnum.py
+EXTRA_DIST += tools/python-yasm/tests/test_symrec.py
+
+if HAVE_PYTHON_BINDINGS
+
+TESTS_ENVIRONMENT += PYTHON=${PYTHON}
+TESTS += tools/python-yasm/tests/python_test.sh
+
+endif
diff --git a/tools/python-yasm/tests/__init__.py b/tools/python-yasm/tests/__init__.py
new file mode 100644
index 0000000..f5afb30
--- /dev/null
+++ b/tools/python-yasm/tests/__init__.py
@@ -0,0 +1,69 @@
+# Test wrapper from Quod Libet
+# http://www.sacredchao.net/quodlibet/
+import unittest, sys
+suites = []
+add = registerCase = suites.append
+from unittest import TestCase
+
+class Mock(object):
+ # A generic mocking object.
+ def __init__(self, **kwargs): self.__dict__.update(kwargs)
+
+import test_intnum
+import test_symrec
+import test_bytecode
+import test_expr
+
+class Result(unittest.TestResult):
+
+ separator1 = '=' * 70
+ separator2 = '-' * 70
+
+ def addSuccess(self, test):
+ unittest.TestResult.addSuccess(self, test)
+ sys.stdout.write('.')
+
+ def addError(self, test, err):
+ unittest.TestResult.addError(self, test, err)
+ sys.stdout.write('E')
+
+ def addFailure(self, test, err):
+ unittest.TestResult.addFailure(self, test, err)
+ sys.stdout.write('F')
+
+ def printErrors(self):
+ succ = self.testsRun - (len(self.errors) + len(self.failures))
+ v = "%3d" % succ
+ count = 50 - self.testsRun
+ sys.stdout.write((" " * count) + v + "\n")
+ self.printErrorList('ERROR', self.errors)
+ self.printErrorList('FAIL', self.failures)
+
+ def printErrorList(self, flavour, errors):
+ for test, err in errors:
+ sys.stdout.write(self.separator1 + "\n")
+ sys.stdout.write("%s: %s\n" % (flavour, str(test)))
+ sys.stdout.write(self.separator2 + "\n")
+ sys.stdout.write("%s\n" % err)
+
+class Runner:
+ def run(self, test):
+ suite = unittest.makeSuite(test)
+ pref = '%s (%d): ' % (test.__name__, len(suite._tests))
+ print pref + " " * (25 - len(pref)),
+ result = Result()
+ suite(result)
+ result.printErrors()
+ return bool(result.failures + result.errors)
+
+def unit(run = []):
+ runner = Runner()
+ failures = False
+ for test in suites:
+ if not run or test.__name__ in run:
+ failures |= runner.run(test)
+ return failures
+
+if __name__ == "__main__":
+ raise SystemExit(unit(sys.argv[1:]))
+
diff --git a/tools/python-yasm/tests/python_test.sh b/tools/python-yasm/tests/python_test.sh
new file mode 100755
index 0000000..18e7a69
--- /dev/null
+++ b/tools/python-yasm/tests/python_test.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# Based on _sanity.sh from Quod Libet
+# http://www.sacredchao.net/quodlibet/
+
+set -e
+
+test -n "${srcdir}" || srcdir=.
+test -n "${PYTHON}" || PYTHON=python
+
+if test "$1" = "--help" -o "$1" = "-h"; then
+ echo "Usage: $0 --sanity | [TestName] ..."
+ exit 0
+elif [ "$1" = "--sanity" ]; then
+ echo "Running static sanity checks."
+ grep "except None:" ${srcdir}/tools/python-yasm/tests/*.py
+else
+ ${PYTHON} -c "import sys; import glob; sys.path.insert(0, '${srcdir}/tools/python-yasm'); sys.path.insert(0, glob.glob('build/lib.*')[0]); import tests; raise SystemExit(tests.unit('$*'.split()))"
+fi
+
diff --git a/tools/python-yasm/tests/test_bytecode.py b/tools/python-yasm/tests/test_bytecode.py
new file mode 100644
index 0000000..eb0e96d
--- /dev/null
+++ b/tools/python-yasm/tests/test_bytecode.py
@@ -0,0 +1,3 @@
+from tests import TestCase, add
+from yasm import Bytecode, Expression
+
diff --git a/tools/python-yasm/tests/test_expr.py b/tools/python-yasm/tests/test_expr.py
new file mode 100644
index 0000000..97b021c
--- /dev/null
+++ b/tools/python-yasm/tests/test_expr.py
@@ -0,0 +1,18 @@
+from tests import TestCase, add
+from yasm import Expression
+import operator
+
+class TExpression(TestCase):
+ def test_create(self):
+ e1 = Expression(operator.add, 1, 2)
+ e2 = Expression('+', 1, 2)
+
+ self.assertEquals(e1.get_intnum(), e1.get_intnum())
+
+ def test_extract(self):
+ e1 = Expression('/', 15, 5)
+ self.assertEquals(e1.get_intnum(), 3)
+ self.assertRaises(ValueError, e1.extract_segoff)
+ self.assertRaises(ValueError, e1.extract_wrt)
+
+add(TExpression)
diff --git a/tools/python-yasm/tests/test_intnum.py b/tools/python-yasm/tests/test_intnum.py
new file mode 100644
index 0000000..a222018
--- /dev/null
+++ b/tools/python-yasm/tests/test_intnum.py
@@ -0,0 +1,77 @@
+from tests import TestCase, add
+from yasm import IntNum
+
+class TIntNum(TestCase):
+ legal_values = [
+ 0, 1, -1, 2, -2, 17, -17,
+ 2**31-1, -2**31, 2**31, 2**32-1, -2**32,
+ 2**63-1, -2**63-1, 2**63, 2**64, -2**64,
+ 2**127-1, -2**127
+ ]
+ overflow_values = [
+ 2**127, -2**127-1
+ ]
+
+ def test_to_from(self):
+ for i in self.legal_values:
+ self.assertEquals(i, int(IntNum(i)))
+ self.assertEquals(i, long(IntNum(i)))
+
+ def test_overflow(self):
+ for i in self.overflow_values:
+ self.assertRaises(OverflowError, IntNum, i)
+
+ str_values = [
+ "0", "00000", "1234", "87654321", "010101010", "FADCBEEF"
+ ]
+ base_values = [2, 8, 10, 12, 16, None, "nasm", "foo"]
+
+ def test_from_str(self):
+ pass
+
+ def test_from_str_base(self):
+ pass
+
+ def test_exceptions(self):
+ self.assertRaises(ZeroDivisionError, IntNum(1).__div__, 0)
+
+ IntNum(1) / 1 # make sure the above error is cleared
+
+ try: IntNum(1) / 0
+ except ZeroDivisionError, err:
+ self.assertEquals('divide by zero', str(err))
+
+ def test_xor(self):
+ a = IntNum(-234)
+ b = IntNum(432)
+ c = a ^ b
+ self.assertEquals(a, -234)
+ self.assertEquals(b, 432)
+ self.assertEquals(c, -234 ^ 432)
+
+ def test_ixor(self):
+ a = IntNum(-234)
+ b = IntNum(432)
+ a ^= b; b ^= a; a ^= b
+ self.assertEquals(a, 432)
+ self.assertEquals(b, -234)
+
+ def test_cmp(self):
+ a = IntNum(-1)
+ b = IntNum(0)
+ c = IntNum(1)
+ self.assert_(a < b < c)
+ self.assert_(a <= b <= c)
+ self.assert_(c >= b >= a)
+ self.assert_(c > b > a)
+ self.assert_(a != b != c)
+
+ def test_abs(self):
+ a = IntNum(-1)
+ b = IntNum(0)
+ c = IntNum(1)
+
+ self.assertEquals(abs(a), abs(c))
+ self.assertEquals(abs(a) - abs(c), abs(b))
+
+add(TIntNum)
diff --git a/tools/python-yasm/tests/test_symrec.py b/tools/python-yasm/tests/test_symrec.py
new file mode 100644
index 0000000..a575b19
--- /dev/null
+++ b/tools/python-yasm/tests/test_symrec.py
@@ -0,0 +1,80 @@
+from tests import TestCase, add
+from yasm import SymbolTable, Expression, YasmError
+
+class TSymbolTable(TestCase):
+ def setUp(self):
+ self.symtab = SymbolTable()
+
+ def test_keys(self):
+ self.assertEquals(len(self.symtab.keys()), 0)
+ self.symtab.declare("foo", None, 0)
+ keys = self.symtab.keys()
+ self.assertEquals(len(keys), 1)
+ self.assertEquals(keys[0], "foo")
+
+ def test_contains(self):
+ self.assert_("foo" not in self.symtab)
+ self.symtab.declare("foo", None, 0)
+ self.assert_("foo" in self.symtab)
+
+ def test_exception(self):
+ expr = Expression('+', 1, 2)
+ self.symtab.define_equ("foo", expr, 0)
+ self.assertRaises(YasmError, self.symtab.define_equ, "foo", expr, 0)
+ self.symtab.define_equ("bar", expr, 0) # cleared
+ self.assertRaises(YasmError, self.symtab.define_special, "bar",
+ 'global')
+
+ def test_iters(self):
+ tab = self.symtab
+ tab.declare("foo", None, 0)
+ tab.declare("bar", None, 0)
+ tab.declare("baz", None, 0)
+
+ # while ordering is not known, it must be consistent
+ self.assertEquals(list(tab.keys()), list(tab.iterkeys()))
+ self.assertEquals(list(tab.values()), list(tab.itervalues()))
+ self.assertEquals(list(tab.items()), list(tab.iteritems()))
+ self.assertEquals(list(tab.iteritems()), zip(tab.keys(), tab.values()))
+
+add(TSymbolTable)
+
+class TSymbolAttr(TestCase):
+ def setUp(self):
+ self.symtab = SymbolTable()
+ self.declsym = self.symtab.declare("foo", None, 0)
+
+ def test_visibility(self):
+ sym = self.symtab.declare("local1", None, 0)
+ self.assertEquals(sym.visibility, set())
+ sym = self.symtab.declare("local2", '', 0)
+ self.assertEquals(sym.visibility, set())
+ sym = self.symtab.declare("local3", 'local', 0)
+ self.assertEquals(sym.visibility, set())
+ sym = self.symtab.declare("global", 'global', 0)
+ self.assertEquals(sym.visibility, set(['global']))
+ sym = self.symtab.declare("common", 'common', 0)
+ self.assertEquals(sym.visibility, set(['common']))
+ sym = self.symtab.declare("extern", 'extern', 0)
+ self.assertEquals(sym.visibility, set(['extern']))
+ sym = self.symtab.declare("dlocal", 'dlocal', 0)
+ self.assertEquals(sym.visibility, set(['dlocal']))
+
+ self.assertRaises(ValueError,
+ lambda: self.symtab.declare("extern2", 'foo', 0))
+ def test_name(self):
+ self.assertEquals(self.declsym.name, "foo")
+
+ def test_equ(self):
+ self.assertRaises(AttributeError, lambda: self.declsym.equ)
+
+ def test_label(self):
+ self.assertRaises(AttributeError, lambda: self.declsym.label)
+
+ def test_is_special(self):
+ self.assertEquals(self.declsym.is_special, False)
+
+ def test_is_curpos(self):
+ self.assertEquals(self.declsym.is_curpos, False)
+
+add(TSymbolAttr)
diff --git a/tools/python-yasm/value.pxi b/tools/python-yasm/value.pxi
new file mode 100644
index 0000000..5d78c05
--- /dev/null
+++ b/tools/python-yasm/value.pxi
@@ -0,0 +1,56 @@
+# Python bindings for Yasm: Pyrex input file for value.h
+#
+# Copyright (C) 2006 Michael Urman, Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+cdef class Value:
+ cdef yasm_value value
+ def __cinit__(self, value=None, size=None):
+ cdef unsigned int sz
+ if size is None:
+ sz = 0
+ else:
+ sz = size;
+
+ yasm_value_initialize(&self.value, NULL, sz)
+ if value is None:
+ pass
+ elif isinstance(value, Expression):
+ yasm_value_initialize(&self.value,
+ yasm_expr_copy((<Expression>value).expr), sz)
+ elif isinstance(value, Symbol):
+ yasm_value_init_sym(&self.value, (<Symbol>value).sym, sz)
+ else:
+ raise TypeError("Invalid value type '%s'" % type(value))
+
+ def __dealloc__(self):
+ yasm_value_delete(&self.value)
+
+ def finalize(self, precbc=None):
+ if precbc is None:
+ return yasm_value_finalize(&self.value, NULL)
+ elif isinstance(precbc, Bytecode):
+ return yasm_value_finalize(&self.value, (<Bytecode>precbc).bc)
+ else:
+ raise TypeError("Invalid precbc type '%s'" % type(precbc))
+
diff --git a/tools/python-yasm/yasm.pyx b/tools/python-yasm/yasm.pyx
new file mode 100644
index 0000000..adbc734
--- /dev/null
+++ b/tools/python-yasm/yasm.pyx
@@ -0,0 +1,137 @@
+# Python bindings for Yasm: Main Pyrex input file
+#
+# Copyright (C) 2006 Michael Urman, Peter Johnson
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+"""Interface to the Yasm library.
+
+The Yasm library (aka libyasm) provides the core functionality of the Yasm
+assembler. Classes in this library provide for manipulation of machine
+instructions and object file constructs such as symbol tables and sections.
+
+Expression objects encapsulate complex expressions containing registers,
+symbols, and operations such as SEG.
+
+Bytecode objects encapsulate data or code objects such as data, reserve,
+align, or instructions.
+
+Section objects encapsulate an object file section, including the section
+name, any Bytecode objects contained within that section, and other
+information.
+
+"""
+
+cdef extern from "Python.h":
+ cdef object PyCObject_FromVoidPtr(void *cobj, void (*destr)(void *))
+ cdef object PyCObject_FromVoidPtrAndDesc(void *cobj, void *desc,
+ void (*destr)(void *, void *))
+ cdef int PyType_Check(object)
+ cdef int PyCObject_Check(object)
+ cdef void *PyCObject_AsVoidPtr(object)
+ cdef void *PyCObject_GetDesc(object)
+
+ cdef object _PyLong_FromByteArray(unsigned char *bytes, unsigned int n,
+ int little_endian, int is_signed)
+ cdef int _PyLong_AsByteArray(object v, unsigned char *bytes, unsigned int n,
+ int little_endian, int is_signed) except -1
+
+ cdef void Py_INCREF(object o)
+ cdef void Py_DECREF(object o)
+
+ cdef void PyErr_SetString(object type, char *message)
+ cdef object PyErr_Format(object type, char *format, ...)
+
+cdef extern from "stdlib.h":
+ cdef void *malloc(int n)
+ cdef void free(void *p)
+
+include "_yasm.pxi"
+
+cdef object __pass_voidp(void *obj, object forclass):
+ return PyCObject_FromVoidPtrAndDesc(obj, <void *>forclass, NULL)
+
+cdef void *__get_voidp(object obj, object forclass) except NULL:
+ cdef void* desc
+
+ if not PyCObject_Check(obj):
+ msg = "obj %r is not a CObject" % obj
+ PyErr_SetString(TypeError, msg)
+ return NULL
+
+ desc = PyCObject_GetDesc(obj)
+
+ if desc != <void *>forclass:
+ if desc == NULL:
+ msg = "CObject type is not set (expecting %s)" % forclass
+ elif PyType_Check(<object>desc):
+ msg = "CObject is for %s not %s" % (<object>desc, forclass)
+ else:
+ msg = "CObject is incorrect (expecting %s)" % forclass
+ PyErr_SetString(TypeError, msg)
+ return NULL
+
+ return PyCObject_AsVoidPtr(obj)
+
+#
+# Link to associated data mechanism to keep Python references paired with
+# yasm objects.
+#
+cdef class __assoc_data_callback:
+ cdef yasm_assoc_data_callback *cb
+ def __cinit__(self, destroy, print_):
+ self.cb = <yasm_assoc_data_callback *>malloc(sizeof(yasm_assoc_data_callback))
+ self.cb.destroy = <void (*) (void *)>PyCObject_AsVoidPtr(destroy)
+ #self.cb.print_ = <void (*) (void *, FILE *, int)>PyCObject_AsVoidPtr(print_)
+ def __dealloc__(self):
+ free(self.cb)
+
+
+cdef class Register:
+ cdef unsigned long reg
+ def __cinit__(self, reg):
+ self.reg = reg
+
+include "errwarn.pxi"
+include "intnum.pxi"
+include "floatnum.pxi"
+include "expr.pxi"
+include "symrec.pxi"
+include "value.pxi"
+
+include "bytecode.pxi"
+
+cdef __initialize():
+ BitVector_Boot()
+ yasm_intnum_initialize()
+ yasm_floatnum_initialize()
+ yasm_errwarn_initialize()
+
+def __cleanup():
+ yasm_floatnum_cleanup()
+ yasm_intnum_cleanup()
+ yasm_errwarn_cleanup()
+ BitVector_Shutdown()
+
+__initialize()
+import atexit
+atexit.register(__cleanup)
+
diff --git a/tools/re2c/CHANGELOG b/tools/re2c/CHANGELOG
new file mode 100644
index 0000000..e3dfd5a
--- /dev/null
+++ b/tools/re2c/CHANGELOG
@@ -0,0 +1,22 @@
+re2c
+----
+
+YASM version
+------------
+- translated to C from C++ for portability reasons
+
+Version 0.9.1
+-------------
+
+- removed rcs comments in source files
+
+Version 0.9
+-----------
+
+- redistribution based on version 0.5
+- added parentheses to assignment expressions in 'if' statements
+- rearranged class members to match initialization order
+- substr fix
+- use array delete [] when necessary
+- other minor fixes for subduing compiler warnings
+
diff --git a/tools/re2c/Makefile.inc b/tools/re2c/Makefile.inc
new file mode 100644
index 0000000..edb89a5
--- /dev/null
+++ b/tools/re2c/Makefile.inc
@@ -0,0 +1,93 @@
+# These utility programs have to be built for BUILD host in cross-build.
+# This makes things rather non-standard automake
+
+noinst_PROGRAMS += re2c
+
+re2c_SOURCES =
+EXTRA_DIST += tools/re2c/main.c
+EXTRA_DIST += tools/re2c/basics.h
+EXTRA_DIST += tools/re2c/globals.h
+EXTRA_DIST += tools/re2c/ins.h
+EXTRA_DIST += tools/re2c/re.h
+EXTRA_DIST += tools/re2c/token.h
+EXTRA_DIST += tools/re2c/code.c
+EXTRA_DIST += tools/re2c/dfa.h
+EXTRA_DIST += tools/re2c/dfa.c
+EXTRA_DIST += tools/re2c/parse.h
+EXTRA_DIST += tools/re2c/parser.h
+EXTRA_DIST += tools/re2c/parser.c
+EXTRA_DIST += tools/re2c/actions.c
+EXTRA_DIST += tools/re2c/scanner.h
+EXTRA_DIST += tools/re2c/scanner.c
+EXTRA_DIST += tools/re2c/mbo_getopt.h
+EXTRA_DIST += tools/re2c/mbo_getopt.c
+EXTRA_DIST += tools/re2c/substr.h
+EXTRA_DIST += tools/re2c/substr.c
+EXTRA_DIST += tools/re2c/translate.c
+re2c_LDADD = re2c-main.$(OBJEXT)
+re2c_LDADD += re2c-code.$(OBJEXT)
+re2c_LDADD += re2c-dfa.$(OBJEXT)
+re2c_LDADD += re2c-parser.$(OBJEXT)
+re2c_LDADD += re2c-actions.$(OBJEXT)
+re2c_LDADD += re2c-scanner.$(OBJEXT)
+re2c_LDADD += re2c-mbo_getopt.$(OBJEXT)
+re2c_LDADD += re2c-substr.$(OBJEXT)
+re2c_LDADD += re2c-translate.$(OBJEXT)
+re2c_LINK = $(CCLD_FOR_BUILD) -o $@
+
+re2c-main.$(OBJEXT): tools/re2c/main.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/re2c/main.c || echo '$(srcdir)/'`tools/re2c/main.c
+
+re2c-code.$(OBJEXT): tools/re2c/code.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/re2c/code.c || echo '$(srcdir)/'`tools/re2c/code.c
+
+re2c-dfa.$(OBJEXT): tools/re2c/dfa.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/re2c/dfa.c || echo '$(srcdir)/'`tools/re2c/dfa.c
+
+re2c-parser.$(OBJEXT): tools/re2c/parser.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/re2c/parser.c || echo '$(srcdir)/'`tools/re2c/parser.c
+
+re2c-actions.$(OBJEXT): tools/re2c/actions.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/re2c/actions.c || echo '$(srcdir)/'`tools/re2c/actions.c
+
+re2c-scanner.$(OBJEXT): tools/re2c/scanner.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/re2c/scanner.c || echo '$(srcdir)/'`tools/re2c/scanner.c
+
+re2c-mbo_getopt.$(OBJEXT): tools/re2c/mbo_getopt.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/re2c/mbo_getopt.c || echo '$(srcdir)/'`tools/re2c/mbo_getopt.c
+
+re2c-substr.$(OBJEXT): tools/re2c/substr.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/re2c/substr.c || echo '$(srcdir)/'`tools/re2c/substr.c
+
+re2c-translate.$(OBJEXT): tools/re2c/translate.c
+ $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ -c -o $@ `test -f tools/re2c/translate.c || echo '$(srcdir)/'`tools/re2c/translate.c
+
+EXTRA_DIST += tools/re2c/CHANGELOG
+EXTRA_DIST += tools/re2c/NO_WARRANTY
+EXTRA_DIST += tools/re2c/README
+EXTRA_DIST += tools/re2c/scanner.re
+EXTRA_DIST += tools/re2c/re2c.1
+EXTRA_DIST += tools/re2c/bootstrap/scanner.c
+EXTRA_DIST += tools/re2c/doc/loplas.ps.gz
+EXTRA_DIST += tools/re2c/doc/sample.bib
+EXTRA_DIST += tools/re2c/examples/basemmap.c
+EXTRA_DIST += tools/re2c/examples/c.re
+EXTRA_DIST += tools/re2c/examples/cmmap.re
+EXTRA_DIST += tools/re2c/examples/cnokw.re
+EXTRA_DIST += tools/re2c/examples/cunroll.re
+EXTRA_DIST += tools/re2c/examples/modula.re
+EXTRA_DIST += tools/re2c/examples/repeater.re
+EXTRA_DIST += tools/re2c/examples/sample.re
+EXTRA_DIST += tools/re2c/examples/simple.re
+EXTRA_DIST += tools/re2c/examples/rexx/README
+EXTRA_DIST += tools/re2c/examples/rexx/rexx.l
+EXTRA_DIST += tools/re2c/examples/rexx/scanio.c
diff --git a/tools/re2c/NO_WARRANTY b/tools/re2c/NO_WARRANTY
new file mode 100644
index 0000000..885a13d
--- /dev/null
+++ b/tools/re2c/NO_WARRANTY
@@ -0,0 +1,2 @@
+re2c is distributed with no warranty whatever. The author and any other
+contributors take no responsibility for the consequences of its use.
diff --git a/tools/re2c/README b/tools/re2c/README
new file mode 100644
index 0000000..943120f
--- /dev/null
+++ b/tools/re2c/README
@@ -0,0 +1,153 @@
+re2c
+----
+
+Version 0.9.1
+Originally written by Peter Bumbulis (peterr@csg.uwaterloo.ca)
+Currently maintained by Brian Young (bayoung@acm.org)
+
+The re2c distribution can be found at:
+
+ http://www.tildeslash.org/re2c/index.html
+
+The source distribution is available from:
+
+ http://www.tildeslash.org/re2c/re2c-0.9.1.tar.gz
+
+This distribution is a cleaned up version of the 0.5 release
+maintained by me (Brian Young). Several bugs were fixed as well
+as code cleanup for warning free compilation. It has been developed
+and tested with egcs 1.0.2 and gcc 2.7.2.3 on Linux x86. Peter
+Bumbulis' original release can be found at:
+
+ ftp://csg.uwaterloo.ca/pub/peterr/re2c.0.5.tar.gz
+
+re2c is a great tool for writing fast and flexible lexers. It has
+served many people well for many years and it deserves to be
+maintained more actively. re2c is on the order of 2-3 times faster
+than a flex based scanner, and its input model is much more
+flexible.
+
+Patches and requests for features will be entertained. Areas of
+particular interest to me are porting (a Solaris and an NT
+version will be forthcoming) and wide character support. Note
+that the code is already quite portable and should be buildable
+on any platform with minor makefile changes.
+
+Peter's original version 0.5 ANNOUNCE and README follows.
+
+Brian
+
+--
+
+re2c is a tool for generating C-based recognizers from regular
+expressions. re2c-based scanners are efficient: for programming
+languages, given similar specifications, an re2c-based scanner is
+typically almost twice as fast as a flex-based scanner with little or no
+increase in size (possibly a decrease on cisc architectures). Indeed,
+re2c-based scanners are quite competitive with hand-crafted ones.
+
+Unlike flex, re2c does not generate complete scanners: the user must
+supply some interface code. While this code is not bulky (about 50-100
+lines for a flex-like scanner; see the man page and examples in the
+distribution) careful coding is required for efficiency (and
+correctness). One advantage of this arrangement is that the generated
+code is not tied to any particular input model. For example, re2c
+generated code can be used to scan data from a null-byte terminated
+buffer as illustrated below.
+
+Given the following source
+
+ #define NULL ((char*) 0)
+ char *scan(char *p){
+ char *q;
+ #define YYCTYPE char
+ #define YYCURSOR p
+ #define YYLIMIT p
+ #define YYMARKER q
+ #define YYFILL(n)
+ /*!re2c
+ [0-9]+ {return YYCURSOR;}
+ [\000-\377] {return NULL;}
+ */
+ }
+
+re2c will generate
+
+ /* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+ #line 1 "simple.re"
+ #define NULL ((char*) 0)
+ char *scan(char *p){
+ char *q;
+ #define YYCTYPE char
+ #define YYCURSOR p
+ #define YYLIMIT p
+ #define YYMARKER q
+ #define YYFILL(n)
+ {
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+ yy1: ++YYCURSOR;
+ yy0:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '/') goto yy4;
+ if(yych >= ':') goto yy4;
+ yy2: yych = *++YYCURSOR;
+ goto yy7;
+ yy3:
+ #line 10
+ {return YYCURSOR;}
+ yy4: yych = *++YYCURSOR;
+ yy5:
+ #line 11
+ {return NULL;}
+ yy6: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ yy7: if(yych <= '/') goto yy3;
+ if(yych <= '9') goto yy6;
+ goto yy3;
+ }
+ #line 12
+
+ }
+
+Note that most compilers will perform dead-code elimination to remove
+all YYCURSOR, YYLIMIT comparisions.
+
+re2c was developed for a particular project (constructing a fast REXX
+scanner of all things!) and so while it has some rough edges, it should
+be quite usable. More information about re2c can be found in the
+(admittedly skimpy) man page; the algorithms and heuristics used are
+described in an upcoming LOPLAS article (included in the distribution).
+Probably the best way to find out more about re2c is to try the supplied
+examples. re2c is written in C++, and is currently being developed
+under Linux using gcc 2.5.8.
+
+Peter
+
+--
+
+re2c is distributed with no warranty whatever. The code is certain to
+contain errors. Neither the author nor any contributor takes
+responsibility for any consequences of its use.
+
+re2c is in the public domain. The data structures and algorithms used
+in re2c are all either taken from documents available to the general
+public or are inventions of the author. Programs generated by re2c may
+be distributed freely. re2c itself may be distributed freely, in source
+or binary, unchanged or modified. Distributors may charge whatever fees
+they can obtain for re2c.
+
+If you do make use of re2c, or incorporate it into a larger project an
+acknowledgement somewhere (documentation, research report, etc.) would
+be appreciated.
+
+Please send bug reports and feedback (including suggestions for
+improving the distribution) to
+
+ peterr@csg.uwaterloo.ca
+
+Include a small example and the banner from parser.y with bug reports.
+
diff --git a/tools/re2c/actions.c b/tools/re2c/actions.c
new file mode 100644
index 0000000..3eaade0
--- /dev/null
+++ b/tools/re2c/actions.c
@@ -0,0 +1,692 @@
+#include <time.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "tools/re2c/globals.h"
+#include "tools/re2c/parse.h"
+#include "tools/re2c/dfa.h"
+
+static Symbol *first = NULL;
+
+void
+Symbol_init(Symbol *r, const SubStr *str)
+{
+ r->next = first;
+ Str_init(&r->name, str);
+ r->re = NULL;
+ first = r;
+}
+
+Symbol *
+Symbol_find(const SubStr *str)
+{
+ Symbol *sym;
+ for(sym = first; sym; sym = sym->next)
+ if(SubStr_eq(&sym->name, str)) return sym;
+ return Symbol_new(str);
+}
+
+/*
+void showIns(FILE *o, const Ins *i, const Ins *base){
+ o.width(3);
+ o << &i - &base << ": ";
+ switch(i.i.tag){
+ case CHAR: {
+ o << "match ";
+ for(const Ins *j = &(&i)[1]; j < (Ins*) i.i.link; ++j)
+ prtCh(o, j->c.value);
+ break;
+ } case GOTO:
+ o << "goto " << ((Ins*) i.i.link - &base);
+ break;
+ case FORK:
+ o << "fork " << ((Ins*) i.i.link - &base);
+ break;
+ case CTXT:
+ o << "term " << ((RuleOp*) i.i.link)->accept;
+ break;
+ case TERM:
+ o << "term " << ((RuleOp*) i.i.link)->accept;
+ break;
+ }
+ o << "\n";
+}
+*/
+
+static unsigned int
+AltOp_fixedLength(RegExp *r)
+{
+ unsigned int l1 = RegExp_fixedLength(r->d.AltCatOp.exp1);
+ /* XXX? Should be exp2? */
+ unsigned int l2 = RegExp_fixedLength(r->d.AltCatOp.exp1);
+ if(l1 != l2 || l1 == ~0u)
+ return ~0u;
+ return l1;
+}
+
+static unsigned int
+CatOp_fixedLength(RegExp *r)
+{
+ unsigned int l1, l2;
+ if((l1 = RegExp_fixedLength(r->d.AltCatOp.exp1)) != ~0u )
+ if((l2 = RegExp_fixedLength(r->d.AltCatOp.exp2)) != ~0u)
+ return l1+l2;
+ return ~0u;
+}
+
+unsigned int
+RegExp_fixedLength(RegExp *r)
+{
+ switch (r->type) {
+ case NULLOP:
+ return 0;
+ case MATCHOP:
+ return 1;
+ case ALTOP:
+ return AltOp_fixedLength(r);
+ case CATOP:
+ return CatOp_fixedLength(r);
+ default:
+ return ~0u;
+ }
+ return ~0u;
+}
+
+void
+RegExp_calcSize(RegExp *re, Char *rep)
+{
+ Range *r;
+ unsigned int c;
+
+ switch (re->type) {
+ case NULLOP:
+ re->size = 0;
+ break;
+ case MATCHOP:
+ re->size = 1;
+ for(r = re->d.match; r; r = r->next)
+ for(c = r->lb; c < r->ub; ++c)
+ if(rep[c] == c)
+ ++re->size;
+ break;
+ case RULEOP:
+ RegExp_calcSize(re->d.RuleOp.exp, rep);
+ RegExp_calcSize(re->d.RuleOp.ctx, rep);
+ re->size = re->d.RuleOp.exp->size + re->d.RuleOp.ctx->size + 1;
+ break;
+ case ALTOP:
+ RegExp_calcSize(re->d.AltCatOp.exp1, rep);
+ RegExp_calcSize(re->d.AltCatOp.exp2, rep);
+ re->size = re->d.AltCatOp.exp1->size
+ + re->d.AltCatOp.exp2->size + 2;
+ break;
+ case CATOP:
+ RegExp_calcSize(re->d.AltCatOp.exp1, rep);
+ RegExp_calcSize(re->d.AltCatOp.exp2, rep);
+ re->size = re->d.AltCatOp.exp1->size + re->d.AltCatOp.exp2->size;
+ break;
+ case CLOSEOP:
+ RegExp_calcSize(re->d.exp, rep);
+ re->size = re->d.exp->size + 1;
+ break;
+ case CLOSEVOP:
+ RegExp_calcSize(re->d.CloseVOp.exp, rep);
+
+ if (re->d.CloseVOp.max >= 0)
+ re->size = (re->d.CloseVOp.exp->size * re->d.CloseVOp.min) +
+ ((1 + re->d.CloseVOp.exp->size) *
+ (re->d.CloseVOp.max - re->d.CloseVOp.min));
+ else
+ re->size = (re->d.CloseVOp.exp->size * re->d.CloseVOp.min) + 1;
+ break;
+ }
+}
+
+static void
+MatchOp_compile(RegExp *re, Char *rep, Ins *i)
+{
+ Ins *j;
+ unsigned int bump;
+ Range *r;
+ unsigned int c;
+
+ i->i.tag = CHAR;
+ i->i.link = &i[re->size];
+ j = &i[1];
+ bump = re->size;
+ for(r = re->d.match; r; r = r->next){
+ for(c = r->lb; c < r->ub; ++c){
+ if(rep[c] == c){
+ j->c.value = c;
+ j->c.bump = --bump;
+ j++;
+ }
+ }
+ }
+}
+
+static void
+AltOp_compile(RegExp *re, Char *rep, Ins *i){
+ Ins *j;
+
+ i->i.tag = FORK;
+ j = &i[re->d.AltCatOp.exp1->size + 1];
+ i->i.link = &j[1];
+ RegExp_compile(re->d.AltCatOp.exp1, rep, &i[1]);
+ j->i.tag = GOTO;
+ j->i.link = &j[re->d.AltCatOp.exp2->size + 1];
+ RegExp_compile(re->d.AltCatOp.exp2, rep, &j[1]);
+}
+
+void
+RegExp_compile(RegExp *re, Char *rep, Ins *i)
+{
+ Ins *jumppoint;
+ int st = 0;
+
+ switch (re->type) {
+ case NULLOP:
+ break;
+ case MATCHOP:
+ MatchOp_compile(re, rep, i);
+ break;
+ case RULEOP:
+ re->d.RuleOp.ins = i;
+ RegExp_compile(re->d.RuleOp.exp, rep, &i[0]);
+ i += re->d.RuleOp.exp->size;
+ RegExp_compile(re->d.RuleOp.ctx, rep, &i[0]);
+ i += re->d.RuleOp.ctx->size;
+ i->i.tag = TERM;
+ i->i.link = re;
+ break;
+ case ALTOP:
+ AltOp_compile(re, rep, i);
+ break;
+ case CATOP:
+ RegExp_compile(re->d.AltCatOp.exp1, rep, &i[0]);
+ RegExp_compile(re->d.AltCatOp.exp2, rep,
+ &i[re->d.AltCatOp.exp1->size]);
+ break;
+ case CLOSEOP:
+ RegExp_compile(re->d.exp, rep, &i[0]);
+ i += re->d.exp->size;
+ i->i.tag = FORK;
+ i->i.link = i - re->d.exp->size;
+ break;
+ case CLOSEVOP:
+ jumppoint = i + ((1 + re->d.CloseVOp.exp->size) *
+ (re->d.CloseVOp.max - re->d.CloseVOp.min));
+ for(st = re->d.CloseVOp.min; st < re->d.CloseVOp.max; st++) {
+ i->i.tag = FORK;
+ i->i.link = jumppoint;
+ i+=1;
+ RegExp_compile(re->d.CloseVOp.exp, rep, &i[0]);
+ i += re->d.CloseVOp.exp->size;
+ }
+ for(st = 0; st < re->d.CloseVOp.min; st++) {
+ RegExp_compile(re->d.CloseVOp.exp, rep, &i[0]);
+ i += re->d.CloseVOp.exp->size;
+ if(re->d.CloseVOp.max < 0 && st == 0) {
+ i->i.tag = FORK;
+ i->i.link = i - re->d.CloseVOp.exp->size;
+ i++;
+ }
+ }
+ break;
+ }
+}
+
+static void
+MatchOp_split(RegExp *re, CharSet *s)
+{
+ Range *r;
+ unsigned int c;
+
+ for(r = re->d.match; r; r = r->next){
+ for(c = r->lb; c < r->ub; ++c){
+ CharPtn *x = s->rep[c], *a = x->nxt;
+ if(!a){
+ if(x->card == 1)
+ continue;
+ x->nxt = a = s->freeHead;
+ if(!(s->freeHead = s->freeHead->nxt))
+ s->freeTail = &s->freeHead;
+ a->nxt = NULL;
+ x->fix = s->fix;
+ s->fix = x;
+ }
+ if(--(x->card) == 0){
+ *s->freeTail = x;
+ *(s->freeTail = &x->nxt) = NULL;
+ }
+ s->rep[c] = a;
+ ++(a->card);
+ }
+ }
+ for(; s->fix; s->fix = s->fix->fix)
+ if(s->fix->card)
+ s->fix->nxt = NULL;
+}
+
+void
+RegExp_split(RegExp *re, CharSet *s)
+{
+ switch (re->type) {
+ case NULLOP:
+ break;
+ case MATCHOP:
+ MatchOp_split(re, s);
+ break;
+ case RULEOP:
+ RegExp_split(re->d.RuleOp.exp, s);
+ RegExp_split(re->d.RuleOp.ctx, s);
+ break;
+ case ALTOP:
+ /* FALLTHROUGH */
+ case CATOP:
+ RegExp_split(re->d.AltCatOp.exp1, s);
+ RegExp_split(re->d.AltCatOp.exp2, s);
+ break;
+ case CLOSEOP:
+ RegExp_split(re->d.exp, s);
+ break;
+ case CLOSEVOP:
+ RegExp_split(re->d.CloseVOp.exp, s);
+ break;
+ }
+}
+
+void
+RegExp_display(RegExp *re, FILE *o)
+{
+ switch (re->type) {
+ case NULLOP:
+ fputc('_', o);
+ break;
+ case MATCHOP:
+ Range_out(o, re->d.match);
+ break;
+ case RULEOP:
+ RegExp_display(re->d.RuleOp.exp, o);
+ fputc('/', o);
+ RegExp_display(re->d.RuleOp.ctx, o);
+ fputc(';', o);
+ break;
+ case ALTOP:
+ RegExp_display(re->d.AltCatOp.exp1, o);
+ fputc('|', o);
+ RegExp_display(re->d.AltCatOp.exp2, o);
+ break;
+ case CATOP:
+ RegExp_display(re->d.AltCatOp.exp1, o);
+ RegExp_display(re->d.AltCatOp.exp2, o);
+ break;
+ case CLOSEOP:
+ RegExp_display(re->d.exp, o);
+ fputc('+', o);
+ break;
+ }
+}
+
+void
+Range_out(FILE *o, const Range *r)
+{
+ if(!r)
+ return;
+
+ if((r->ub - r->lb) == 1){
+ prtCh(o, r->lb);
+ } else {
+ prtCh(o, r->lb);
+ fputc('-', o);
+ prtCh(o, r->ub-1);
+ }
+ Range_out(o, r->next);
+}
+
+static Range *doUnion(Range *r1, Range *r2){
+ Range *r, **rP = &r;
+ for(;;){
+ Range *s;
+ if(r1->lb <= r2->lb){
+ s = Range_new_copy(r1);
+ } else {
+ s = Range_new_copy(r2);
+ }
+ *rP = s;
+ rP = &s->next;
+ for(;;){
+ if(r1->lb <= r2->lb){
+ if(r1->lb > s->ub)
+ break;
+ if(r1->ub > s->ub)
+ s->ub = r1->ub;
+ if(!(r1 = r1->next)){
+ unsigned int ub = 0;
+ for(; r2 && r2->lb <= s->ub; r2 = r2->next)
+ ub = r2->ub;
+ if(ub > s->ub)
+ s->ub = ub;
+ *rP = r2;
+ return r;
+ }
+ } else {
+ if(r2->lb > s->ub)
+ break;
+ if(r2->ub > s->ub)
+ s->ub = r2->ub;
+ if(!(r2 = r2->next)){
+ unsigned int ub = 0;
+ for(; r1 && r1->lb <= s->ub; r1 = r1->next)
+ ub = r1->ub;
+ if(ub > s->ub)
+ s->ub = ub;
+ *rP = r1;
+ return r;
+ }
+ }
+ }
+ }
+ *rP = NULL;
+ return r;
+}
+
+static Range *doDiff(Range *r1, Range *r2){
+ Range *r, *s, **rP = &r;
+ for(; r1; r1 = r1->next){
+ unsigned int lb = r1->lb;
+ for(; r2 && r2->ub <= r1->lb; r2 = r2->next);
+ for(; r2 && r2->lb < r1->ub; r2 = r2->next){
+ if(lb < r2->lb){
+ *rP = s = Range_new(lb, r2->lb);
+ rP = &s->next;
+ }
+ if((lb = r2->ub) >= r1->ub)
+ goto noMore;
+ }
+ *rP = s = Range_new(lb, r1->ub);
+ rP = &s->next;
+ noMore:;
+ }
+ *rP = NULL;
+ return r;
+}
+
+static RegExp *merge(RegExp *m1, RegExp *m2){
+ if(!m1)
+ return m2;
+ if(!m2)
+ return m1;
+ return RegExp_new_MatchOp(doUnion(m1->d.match, m2->d.match));
+}
+
+RegExp *mkDiff(RegExp *e1, RegExp *e2){
+ RegExp *m1, *m2;
+ Range *r;
+ if(!(m1 = RegExp_isA(e1, MATCHOP)))
+ return NULL;
+ if(!(m2 = RegExp_isA(e2, MATCHOP)))
+ return NULL;
+ r = doDiff(m1->d.match, m2->d.match);
+ return r? RegExp_new_MatchOp(r) : RegExp_new_NullOp();
+}
+
+static RegExp *doAlt(RegExp *e1, RegExp *e2){
+ if(!e1)
+ return e2;
+ if(!e2)
+ return e1;
+ return RegExp_new_AltOp(e1, e2);
+}
+
+RegExp *mkAlt(RegExp *e1, RegExp *e2){
+ RegExp *a;
+ RegExp *m1, *m2;
+ if((a = RegExp_isA(e1, ALTOP))){
+ if((m1 = RegExp_isA(a->d.AltCatOp.exp1, MATCHOP)))
+ e1 = a->d.AltCatOp.exp2;
+ } else if((m1 = RegExp_isA(e1, MATCHOP))){
+ e1 = NULL;
+ }
+ if((a = RegExp_isA(e2, ALTOP))){
+ if((m2 = RegExp_isA(a->d.AltCatOp.exp1, MATCHOP)))
+ e2 = a->d.AltCatOp.exp2;
+ } else if((m2 = RegExp_isA(e2, MATCHOP))){
+ e2 = NULL;
+ }
+ return doAlt(merge(m1, m2), doAlt(e1, e2));
+}
+
+static unsigned char unescape(SubStr *s){
+ unsigned char c;
+ unsigned char v;
+ s->len--;
+ if((c = *s->str++) != '\\' || s->len == 0)
+ return xlat[c];
+ s->len--;
+ switch(c = *s->str++){
+ case 'n':
+ return xlat['\n'];
+ case 't':
+ return xlat['\t'];
+ case 'v':
+ return xlat['\v'];
+ case 'b':
+ return xlat['\b'];
+ case 'r':
+ return xlat['\r'];
+ case 'f':
+ return xlat['\f'];
+ case 'a':
+ return xlat['\a'];
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7': {
+ v = c - '0';
+ for(; s->len != 0 && '0' <= (c = *s->str) && c <= '7'; s->len--, s->str++)
+ v = v*8 + (c - '0');
+ return v;
+ } default:
+ return xlat[c];
+ }
+}
+
+static Range *getRange(SubStr *s){
+ unsigned char lb = unescape(s), ub;
+ if(s->len < 2 || *s->str != '-'){
+ ub = lb;
+ } else {
+ s->len--; s->str++;
+ ub = unescape(s);
+ if(ub < lb){
+ unsigned char tmp;
+ tmp = lb; lb = ub; ub = tmp;
+ }
+ }
+ return Range_new(lb, ub+1);
+}
+
+static RegExp *matchChar(unsigned int c){
+ return RegExp_new_MatchOp(Range_new(c, c+1));
+}
+
+RegExp *strToRE(SubStr s){
+ RegExp *re;
+ s.len -= 2; s.str += 1;
+ if(s.len == 0)
+ return RegExp_new_NullOp();
+ re = matchChar(unescape(&s));
+ while(s.len > 0)
+ re = RegExp_new_CatOp(re, matchChar(unescape(&s)));
+ return re;
+}
+
+RegExp *strToCaseInsensitiveRE(SubStr s){
+ unsigned char c;
+ RegExp *re, *reL, *reU;
+ s.len -= 2; s.str += 1;
+ if(s.len == 0)
+ return RegExp_new_NullOp();
+ c = unescape(&s);
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
+ reL = matchChar(tolower(c));
+ reU = matchChar(toupper(c));
+ re = mkAlt(reL, reU);
+ } else {
+ re = matchChar(c);
+ }
+ while(s.len > 0) {
+ c = unescape(&s);
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
+ reL = matchChar(tolower(c));
+ reU = matchChar(toupper(c));
+ re = RegExp_new_CatOp(re, mkAlt(reL, reU));
+ } else {
+ re = RegExp_new_CatOp(re, matchChar(c));
+ }
+ }
+ return re;
+}
+
+RegExp *ranToRE(SubStr s){
+ Range *r;
+ s.len -= 2; s.str += 1;
+ if(s.len == 0)
+ return RegExp_new_NullOp();
+ r = getRange(&s);
+ while(s.len > 0)
+ r = doUnion(r, getRange(&s));
+ return RegExp_new_MatchOp(r);
+}
+
+RegExp *invToRE(SubStr s)
+{
+ RegExp *any, *ran, *inv;
+ SubStr *ss;
+
+
+ s.len--;
+ s.str++;
+
+ ss = SubStr_new("[\\000-\\377]", strlen("[\\000-\\377]"));
+ any = ranToRE(*ss);
+ free(ss);
+ if (s.len <= 2)
+ return any;
+
+ ran = ranToRE(s);
+ inv = mkDiff(any, ran);
+
+ free(ran);
+ free(any);
+
+ return inv;
+}
+
+RegExp *mkDot()
+{
+ SubStr *ss = SubStr_new("[\\000-\\377]", strlen("[\\000-\\377]"));
+ RegExp * any = ranToRE(*ss);
+ RegExp * ran = matchChar('\n');
+ RegExp * inv = mkDiff(any, ran);
+
+ free(ss);
+ free(ran);
+ free(any);
+
+ return inv;
+}
+
+RegExp *
+RegExp_new_RuleOp(RegExp *e, RegExp *c, Token *t, unsigned int a)
+{
+ RegExp *r = malloc(sizeof(RegExp));
+ r->type = RULEOP;
+ r->d.RuleOp.exp = e;
+ r->d.RuleOp.ctx = c;
+ r->d.RuleOp.ins = NULL;
+ r->d.RuleOp.accept = a;
+ r->d.RuleOp.code = t;
+ return r;
+}
+
+static void optimize(Ins *i){
+ while(!isMarked(i)){
+ mark(i);
+ if(i->i.tag == CHAR){
+ i = (Ins*) i->i.link;
+ } else if(i->i.tag == GOTO || i->i.tag == FORK){
+ Ins *target = (Ins*) i->i.link;
+ optimize(target);
+ if(target->i.tag == GOTO)
+ i->i.link = target->i.link == target? i : target;
+ if(i->i.tag == FORK){
+ Ins *follow = (Ins*) &i[1];
+ optimize(follow);
+ if(follow->i.tag == GOTO && follow->i.link == follow){
+ i->i.tag = GOTO;
+ } else if(i->i.link == i){
+ i->i.tag = GOTO;
+ i->i.link = follow;
+ }
+ }
+ return;
+ } else {
+ ++i;
+ }
+ }
+}
+
+void genCode(FILE *o, RegExp *re){
+ CharSet cs;
+ unsigned int j;
+ Char rep[nChars];
+ Ins *ins, *eoi;
+ DFA *dfa;
+
+ memset(&cs, 0, sizeof(cs));
+ for(j = 0; j < nChars; ++j){
+ cs.rep[j] = &cs.ptn[0];
+ cs.ptn[j].nxt = &cs.ptn[j+1];
+ }
+ cs.freeHead = &cs.ptn[1];
+ *(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL;
+ cs.ptn[0].card = nChars;
+ cs.ptn[0].nxt = NULL;
+ RegExp_split(re, &cs);
+/*
+ for(unsigned int k = 0; k < nChars;){
+ for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];);
+ printSpan(cerr, j, k);
+ cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl;
+ }
+*/
+ for(j = 0; j < nChars; ++j){
+ if(!cs.rep[j]->nxt)
+ cs.rep[j]->nxt = &cs.ptn[j];
+ rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]);
+ }
+
+ RegExp_calcSize(re, rep);
+ ins = malloc(sizeof(Ins)*(re->size+1));
+ memset(ins, 0, (re->size+1)*sizeof(Ins));
+ RegExp_compile(re, rep, ins);
+ eoi = &ins[re->size];
+ eoi->i.tag = GOTO;
+ eoi->i.link = eoi;
+
+ optimize(ins);
+ for(j = 0; j < re->size;){
+ unmark(&ins[j]);
+ if(ins[j].i.tag == CHAR){
+ j = (Ins*) ins[j].i.link - ins;
+ } else {
+ j++;
+ }
+ }
+
+ dfa = DFA_new(ins, re->size, 0, 256, rep);
+ DFA_emit(dfa, o);
+ DFA_delete(dfa);
+ free(ins);
+}
diff --git a/tools/re2c/basics.h b/tools/re2c/basics.h
new file mode 100644
index 0000000..1531e75
--- /dev/null
+++ b/tools/re2c/basics.h
@@ -0,0 +1,14 @@
+#ifndef re2c_basics_h
+#define re2c_basics_h
+
+#if defined(__GNUC__) && !defined(inline)
+#define inline __inline__
+#endif
+
+typedef unsigned char byte;
+typedef unsigned short word;
+typedef unsigned long dword;
+
+#define PACKAGE_VERSION "1.0.0"
+
+#endif
diff --git a/tools/re2c/bootstrap/scanner.c b/tools/re2c/bootstrap/scanner.c
new file mode 100644
index 0000000..fd0ca93
--- /dev/null
+++ b/tools/re2c/bootstrap/scanner.c
@@ -0,0 +1,748 @@
+/* Generated by re2c 0.9.1-C on Sun Oct 9 22:15:58 2005
+ */
+#line 1 "scanner.re"
+#include <stdlib.h>
+#include <string.h>
+#include "tools/re2c/scanner.h"
+#include "tools/re2c/parse.h"
+#include "tools/re2c/globals.h"
+#include "re2c-parser.h"
+
+#ifndef MAX
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#endif
+
+#define BSIZE 8192
+
+#define YYCTYPE unsigned char
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RETURN(i) {s->cur = cursor; return i;}
+
+static unsigned char *fill(Scanner*, unsigned char*);
+
+void
+Scanner_init(Scanner *s, FILE *i)
+{
+ s->in = i;
+ s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top =
+ s->eof = NULL;
+ s->tchar = s->tline = 0;
+ s->cline = 1;
+}
+
+static unsigned char *
+fill(Scanner *s, unsigned char *cursor)
+{
+ if(!s->eof){
+ unsigned int cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ unsigned char *buf = malloc(((s->lim - s->bot) + BSIZE));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ if (s->bot)
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = fread(s->lim, 1, BSIZE, s->in)) != BSIZE){
+ s->eof = &s->lim[cnt]; *s->eof++ = '\0';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+#line 79 "scanner.re"
+
+
+int
+Scanner_echo(Scanner *s, FILE *out)
+{
+ unsigned char *cursor = s->cur;
+ int ignore_eoc = 0;
+
+ /* Catch EOF */
+ if (s->eof && cursor == s->eof)
+ return 0;
+
+ s->tok = cursor;
+echo:
+
+#line 87 "scanner.c"
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+ ++YYCURSOR;
+yy0:
+ if((YYLIMIT - YYCURSOR) < 11) YYFILL(11);
+ yych = *YYCURSOR;
+ if(yych <= ')'){
+ if(yych <= '\000') goto yy7;
+ if(yych == '\n') goto yy5;
+ goto yy9;
+ } else {
+ if(yych <= '*') goto yy4;
+ if(yych != '/') goto yy9;
+ goto yy2;
+ }
+yy2: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '*') goto yy12;
+ goto yy3;
+yy3:
+#line 117 "scanner.re"
+{ goto echo; }
+#line 112 "scanner.c"
+yy4: yych = *++YYCURSOR;
+ if(yych == '/') goto yy10;
+ goto yy3;
+yy5: yych = *++YYCURSOR;
+ goto yy6;
+yy6:
+#line 112 "scanner.re"
+{ fwrite(s->tok, 1, cursor - s->tok, out);
+ s->tok = s->pos = cursor; s->cline++; oline++;
+ goto echo; }
+#line 123 "scanner.c"
+yy7: yych = *++YYCURSOR;
+ goto yy8;
+yy8:
+#line 115 "scanner.re"
+{ fwrite(s->tok, 1, cursor - s->tok - 1, out); /* -1 so we don't write out the \0 */
+ if(cursor == s->eof) { RETURN(0); } }
+#line 130 "scanner.c"
+yy9: yych = *++YYCURSOR;
+ goto yy3;
+yy10: yych = *++YYCURSOR;
+ goto yy11;
+yy11:
+#line 103 "scanner.re"
+{
+ if (ignore_eoc) {
+ ignore_eoc = 0;
+ } else {
+ fwrite(s->tok, 1, cursor - s->tok, out);
+ }
+ s->tok = s->pos = cursor;
+ goto echo;
+ }
+#line 146 "scanner.c"
+yy12: yych = *++YYCURSOR;
+ if(yych == '!') goto yy14;
+ goto yy13;
+yy13: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy3;
+ }
+yy14: yych = *++YYCURSOR;
+ if(yych == 'm') goto yy15;
+ if(yych == 'r') goto yy16;
+ goto yy13;
+yy15: yych = *++YYCURSOR;
+ if(yych == 'a') goto yy21;
+ goto yy13;
+yy16: yych = *++YYCURSOR;
+ if(yych != 'e') goto yy13;
+ goto yy17;
+yy17: yych = *++YYCURSOR;
+ if(yych != '2') goto yy13;
+ goto yy18;
+yy18: yych = *++YYCURSOR;
+ if(yych != 'c') goto yy13;
+ goto yy19;
+yy19: yych = *++YYCURSOR;
+ goto yy20;
+yy20:
+#line 94 "scanner.re"
+{ fwrite(s->tok, 1, &cursor[-7] - s->tok, out);
+ s->tok = cursor;
+ RETURN(1); }
+#line 177 "scanner.c"
+yy21: yych = *++YYCURSOR;
+ if(yych != 'x') goto yy13;
+ goto yy22;
+yy22: yych = *++YYCURSOR;
+ if(yych != ':') goto yy13;
+ goto yy23;
+yy23: yych = *++YYCURSOR;
+ if(yych != 'r') goto yy13;
+ goto yy24;
+yy24: yych = *++YYCURSOR;
+ if(yych != 'e') goto yy13;
+ goto yy25;
+yy25: yych = *++YYCURSOR;
+ if(yych != '2') goto yy13;
+ goto yy26;
+yy26: yych = *++YYCURSOR;
+ if(yych != 'c') goto yy13;
+ goto yy27;
+yy27: yych = *++YYCURSOR;
+ goto yy28;
+yy28:
+#line 97 "scanner.re"
+{
+ fprintf(out, "#define YYMAXFILL %u\n", maxFill);
+ s->tok = s->pos = cursor;
+ ignore_eoc = 1;
+ goto echo;
+ }
+#line 206 "scanner.c"
+}
+#line 118 "scanner.re"
+
+}
+
+
+int
+Scanner_scan(Scanner *s)
+{
+ unsigned char *cursor = s->cur;
+ unsigned int depth;
+
+scan:
+ s->tchar = cursor - s->pos;
+ s->tline = s->cline;
+ s->tok = cursor;
+
+#line 224 "scanner.c"
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy29;
+ ++YYCURSOR;
+yy29:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '/'){
+ if(yych <= '"'){
+ if(yych <= '\n'){
+ if(yych <= '\b') goto yy53;
+ if(yych <= '\t') goto yy47;
+ goto yy49;
+ } else {
+ if(yych == ' ') goto yy47;
+ if(yych <= '!') goto yy53;
+ goto yy37;
+ }
+ } else {
+ if(yych <= '*'){
+ if(yych <= '&') goto yy53;
+ if(yych <= '\'') goto yy39;
+ if(yych <= ')') goto yy43;
+ goto yy35;
+ } else {
+ if(yych <= '+') goto yy44;
+ if(yych <= '-') goto yy53;
+ if(yych <= '.') goto yy51;
+ goto yy33;
+ }
+ }
+ } else {
+ if(yych <= '@'){
+ if(yych <= '<'){
+ if(yych == ';') goto yy43;
+ goto yy53;
+ } else {
+ if(yych <= '=') goto yy43;
+ if(yych == '?') goto yy44;
+ goto yy53;
+ }
+ } else {
+ if(yych <= '`'){
+ if(yych <= 'Z') goto yy45;
+ if(yych <= '[') goto yy41;
+ if(yych <= '\\') goto yy43;
+ goto yy53;
+ } else {
+ if(yych <= 'z') goto yy45;
+ if(yych <= '{') goto yy31;
+ if(yych <= '|') goto yy43;
+ goto yy53;
+ }
+ }
+ }
+yy31: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych <= '/') goto yy32;
+ if(yych <= '9') goto yy84;
+ goto yy32;
+yy32:
+#line 133 "scanner.re"
+{ depth = 1;
+ goto code;
+ }
+#line 291 "scanner.c"
+yy33: yych = *++YYCURSOR;
+ if(yych == '*') goto yy82;
+ goto yy34;
+yy34:
+#line 163 "scanner.re"
+{ RETURN(*s->tok); }
+#line 298 "scanner.c"
+yy35: yych = *++YYCURSOR;
+ if(yych == '/') goto yy80;
+ goto yy36;
+yy36:
+#line 165 "scanner.re"
+{ yylval.op = *s->tok;
+ RETURN(CLOSE); }
+#line 306 "scanner.c"
+yy37: yyaccept = 1;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych != '\n') goto yy76;
+ goto yy38;
+yy38:
+#line 150 "scanner.re"
+{ Scanner_fatal(s, "unterminated string constant (missing \")"); }
+#line 314 "scanner.c"
+yy39: yyaccept = 2;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych != '\n') goto yy71;
+ goto yy40;
+yy40:
+#line 151 "scanner.re"
+{ Scanner_fatal(s, "unterminated string constant (missing ')"); }
+#line 322 "scanner.c"
+yy41: yyaccept = 3;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy42;
+ if(yych == '^') goto yy62;
+ goto yy60;
+yy42:
+#line 161 "scanner.re"
+{ Scanner_fatal(s, "unterminated range (missing ])"); }
+#line 331 "scanner.c"
+yy43: yych = *++YYCURSOR;
+ goto yy34;
+yy44: yych = *++YYCURSOR;
+ goto yy36;
+yy45: yych = *++YYCURSOR;
+ goto yy58;
+yy46:
+#line 180 "scanner.re"
+{ SubStr substr;
+ s->cur = cursor;
+ substr = Scanner_token(s);
+ yylval.symbol = Symbol_find(&substr);
+ return ID; }
+#line 345 "scanner.c"
+yy47: yych = *++YYCURSOR;
+ goto yy56;
+yy48:
+#line 186 "scanner.re"
+{ goto scan; }
+#line 351 "scanner.c"
+yy49: yych = *++YYCURSOR;
+ goto yy50;
+yy50:
+#line 188 "scanner.re"
+{ if(cursor == s->eof) RETURN(0);
+ s->pos = cursor; s->cline++;
+ goto scan;
+ }
+#line 360 "scanner.c"
+yy51: yych = *++YYCURSOR;
+ goto yy52;
+yy52:
+#line 193 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = mkDot();
+ return RANGE;
+ }
+#line 369 "scanner.c"
+yy53: yych = *++YYCURSOR;
+ goto yy54;
+yy54:
+#line 198 "scanner.re"
+{ fprintf(stderr, "unexpected character: '%c'\n", *s->tok);
+ goto scan;
+ }
+#line 377 "scanner.c"
+yy55: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy56;
+yy56: if(yych == '\t') goto yy55;
+ if(yych == ' ') goto yy55;
+ goto yy48;
+yy57: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy58;
+yy58: if(yych <= '@'){
+ if(yych <= '/') goto yy46;
+ if(yych <= '9') goto yy57;
+ goto yy46;
+ } else {
+ if(yych <= 'Z') goto yy57;
+ if(yych <= '`') goto yy46;
+ if(yych <= 'z') goto yy57;
+ goto yy46;
+ }
+yy59: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy60;
+yy60: if(yych <= '['){
+ if(yych != '\n') goto yy59;
+ goto yy61;
+ } else {
+ if(yych <= '\\') goto yy64;
+ if(yych <= ']') goto yy65;
+ goto yy59;
+ }
+yy61: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy32;
+ case 1: goto yy38;
+ case 2: goto yy40;
+ case 3: goto yy42;
+ }
+yy62: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy63;
+yy63: if(yych <= '['){
+ if(yych == '\n') goto yy61;
+ goto yy62;
+ } else {
+ if(yych <= '\\') goto yy67;
+ if(yych <= ']') goto yy68;
+ goto yy62;
+ }
+yy64: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy61;
+ goto yy59;
+yy65: yych = *++YYCURSOR;
+ goto yy66;
+yy66:
+#line 157 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = ranToRE(Scanner_token(s));
+ return RANGE; }
+#line 442 "scanner.c"
+yy67: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy61;
+ goto yy62;
+yy68: yych = *++YYCURSOR;
+ goto yy69;
+yy69:
+#line 153 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = invToRE(Scanner_token(s));
+ return RANGE; }
+#line 455 "scanner.c"
+yy70: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy71;
+yy71: if(yych <= '&'){
+ if(yych == '\n') goto yy61;
+ goto yy70;
+ } else {
+ if(yych <= '\'') goto yy73;
+ if(yych != '\\') goto yy70;
+ goto yy72;
+ }
+yy72: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy61;
+ goto yy70;
+yy73: yych = *++YYCURSOR;
+ goto yy74;
+yy74:
+#line 146 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s));
+ return STRING; }
+#line 480 "scanner.c"
+yy75: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy76;
+yy76: if(yych <= '!'){
+ if(yych == '\n') goto yy61;
+ goto yy75;
+ } else {
+ if(yych <= '"') goto yy78;
+ if(yych != '\\') goto yy75;
+ goto yy77;
+ }
+yy77: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy61;
+ goto yy75;
+yy78: yych = *++YYCURSOR;
+ goto yy79;
+yy79:
+#line 142 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = strToRE(Scanner_token(s));
+ return STRING; }
+#line 505 "scanner.c"
+yy80: yych = *++YYCURSOR;
+ goto yy81;
+yy81:
+#line 139 "scanner.re"
+{ s->tok = cursor;
+ RETURN(0); }
+#line 512 "scanner.c"
+yy82: yych = *++YYCURSOR;
+ goto yy83;
+yy83:
+#line 136 "scanner.re"
+{ depth = 1;
+ goto comment; }
+#line 519 "scanner.c"
+yy84: ++YYCURSOR;
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ goto yy85;
+yy85: if(yych <= '/'){
+ if(yych == ',') goto yy88;
+ goto yy61;
+ } else {
+ if(yych <= '9') goto yy84;
+ if(yych != '}') goto yy61;
+ goto yy86;
+ }
+yy86: yych = *++YYCURSOR;
+ goto yy87;
+yy87:
+#line 168 "scanner.re"
+{ yylval.extop.minsize = atoi((char *)s->tok+1);
+ yylval.extop.maxsize = atoi((char *)s->tok+1);
+ RETURN(CLOSESIZE); }
+#line 539 "scanner.c"
+yy88: yych = *++YYCURSOR;
+ if(yych != '}') goto yy92;
+ goto yy89;
+yy89: yych = *++YYCURSOR;
+ goto yy90;
+yy90:
+#line 176 "scanner.re"
+{ yylval.extop.minsize = atoi((char *)s->tok+1);
+ yylval.extop.maxsize = -1;
+ RETURN(CLOSESIZE); }
+#line 550 "scanner.c"
+yy91: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy92;
+yy92: if(yych <= '/') goto yy61;
+ if(yych <= '9') goto yy91;
+ if(yych != '}') goto yy61;
+ goto yy93;
+yy93: yych = *++YYCURSOR;
+ goto yy94;
+yy94:
+#line 172 "scanner.re"
+{ yylval.extop.minsize = atoi((char *)s->tok+1);
+ yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1));
+ RETURN(CLOSESIZE); }
+#line 566 "scanner.c"
+}
+#line 201 "scanner.re"
+
+
+code:
+
+#line 573 "scanner.c"
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy95;
+ ++YYCURSOR;
+yy95:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '&'){
+ if(yych <= '\n'){
+ if(yych <= '\t') goto yy103;
+ goto yy101;
+ } else {
+ if(yych == '"') goto yy105;
+ goto yy103;
+ }
+ } else {
+ if(yych <= '{'){
+ if(yych <= '\'') goto yy106;
+ if(yych <= 'z') goto yy103;
+ goto yy99;
+ } else {
+ if(yych != '}') goto yy103;
+ goto yy97;
+ }
+ }
+yy97: yych = *++YYCURSOR;
+ goto yy98;
+yy98:
+#line 205 "scanner.re"
+{ if(--depth == 0){
+ s->cur = cursor;
+ yylval.token = Token_new(Scanner_token(s), s->tline);
+ return CODE;
+ }
+ goto code; }
+#line 610 "scanner.c"
+yy99: yych = *++YYCURSOR;
+ goto yy100;
+yy100:
+#line 211 "scanner.re"
+{ ++depth;
+ goto code; }
+#line 617 "scanner.c"
+yy101: yych = *++YYCURSOR;
+ goto yy102;
+yy102:
+#line 213 "scanner.re"
+{ if(cursor == s->eof) Scanner_fatal(s, "missing '}'");
+ s->pos = cursor; s->cline++;
+ goto code;
+ }
+#line 626 "scanner.c"
+yy103: yych = *++YYCURSOR;
+ goto yy104;
+yy104:
+#line 217 "scanner.re"
+{ goto code; }
+#line 632 "scanner.c"
+yy105: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy104;
+ goto yy112;
+yy106: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy104;
+ goto yy108;
+yy107: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy108;
+yy108: if(yych <= '&'){
+ if(yych != '\n') goto yy107;
+ goto yy109;
+ } else {
+ if(yych <= '\'') goto yy103;
+ if(yych == '\\') goto yy110;
+ goto yy107;
+ }
+yy109: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy104;
+ }
+yy110: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy109;
+ goto yy107;
+yy111: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy112;
+yy112: if(yych <= '!'){
+ if(yych == '\n') goto yy109;
+ goto yy111;
+ } else {
+ if(yych <= '"') goto yy103;
+ if(yych != '\\') goto yy111;
+ goto yy113;
+ }
+yy113: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy109;
+ goto yy111;
+}
+#line 218 "scanner.re"
+
+
+comment:
+
+#line 685 "scanner.c"
+{
+ YYCTYPE yych;
+ goto yy114;
+ ++YYCURSOR;
+yy114:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= ')'){
+ if(yych == '\n') goto yy119;
+ goto yy121;
+ } else {
+ if(yych <= '*') goto yy116;
+ if(yych == '/') goto yy118;
+ goto yy121;
+ }
+yy116: yych = *++YYCURSOR;
+ if(yych == '/') goto yy124;
+ goto yy117;
+yy117:
+#line 232 "scanner.re"
+{ goto comment; }
+#line 707 "scanner.c"
+yy118: yych = *++YYCURSOR;
+ if(yych == '*') goto yy122;
+ goto yy117;
+yy119: yych = *++YYCURSOR;
+ goto yy120;
+yy120:
+#line 228 "scanner.re"
+{ if(cursor == s->eof) RETURN(0);
+ s->tok = s->pos = cursor; s->cline++;
+ goto comment;
+ }
+#line 719 "scanner.c"
+yy121: yych = *++YYCURSOR;
+ goto yy117;
+yy122: yych = *++YYCURSOR;
+ goto yy123;
+yy123:
+#line 226 "scanner.re"
+{ ++depth;
+ goto comment; }
+#line 728 "scanner.c"
+yy124: yych = *++YYCURSOR;
+ goto yy125;
+yy125:
+#line 222 "scanner.re"
+{ if(--depth == 0)
+ goto scan;
+ else
+ goto comment; }
+#line 737 "scanner.c"
+}
+#line 233 "scanner.re"
+
+}
+
+void
+Scanner_fatal(Scanner *s, const char *msg)
+{
+ fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg);
+ exit(1);
+}
diff --git a/tools/re2c/code.c b/tools/re2c/code.c
new file mode 100644
index 0000000..bd54baa
--- /dev/null
+++ b/tools/re2c/code.c
@@ -0,0 +1,969 @@
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "tools/re2c/substr.h"
+#include "tools/re2c/globals.h"
+#include "tools/re2c/dfa.h"
+#include "tools/re2c/parse.h"
+
+#ifdef _WIN32
+/* tmpfile() replacment for Windows.
+ *
+ * On Windows tmpfile() creates the file in the root directory. This
+ * may fail due to unsufficient privileges.
+ */
+static FILE *
+win32_tmpfile (void)
+{
+ DWORD path_len;
+ WCHAR path_name[MAX_PATH + 1];
+ WCHAR file_name[MAX_PATH + 1];
+ HANDLE handle;
+ int fd;
+ FILE *fp;
+
+ path_len = GetTempPathW (MAX_PATH, path_name);
+ if (path_len <= 0 || path_len >= MAX_PATH)
+ return NULL;
+
+ if (GetTempFileNameW (path_name, L"ps_", 0, file_name) == 0)
+ return NULL;
+
+ handle = CreateFileW (file_name,
+ GENERIC_READ | GENERIC_WRITE,
+ 0,
+ NULL,
+ CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE,
+ NULL);
+ if (handle == INVALID_HANDLE_VALUE) {
+ DeleteFileW (file_name);
+ return NULL;
+ }
+
+ fd = _open_osfhandle((intptr_t) handle, 0);
+ if (fd < 0) {
+ CloseHandle (handle);
+ return NULL;
+ }
+
+ fp = fdopen(fd, "w+b");
+ if (fp == NULL) {
+ _close(fd);
+ return NULL;
+ }
+
+ return fp;
+}
+#endif
+
+static void useLabel(size_t value) {
+ while (value >= vUsedLabelAlloc) {
+ vUsedLabels = realloc(vUsedLabels, vUsedLabelAlloc * 2);
+ if (!vUsedLabels) {
+ fputs("Out of memory.\n", stderr);
+ exit(EXIT_FAILURE);
+ }
+ memset(vUsedLabels + vUsedLabelAlloc, 0, vUsedLabelAlloc);
+ vUsedLabelAlloc *= 2;
+ }
+ vUsedLabels[value] = 1;
+}
+
+/* there must be at least one span in list; all spans must cover
+ * same range
+ */
+
+void Go_compact(Go *g){
+ /* arrange so that adjacent spans have different targets */
+ unsigned int i = 0, j;
+ for(j = 1; j < g->nSpans; ++j){
+ if(g->span[j].to != g->span[i].to){
+ ++i; g->span[i].to = g->span[j].to;
+ }
+ g->span[i].ub = g->span[j].ub;
+ }
+ g->nSpans = i + 1;
+}
+
+void Go_unmap(Go *g, Go *base, State *x){
+ Span *s = g->span, *b = base->span, *e = &b[base->nSpans];
+ unsigned int lb = 0;
+ s->ub = 0;
+ s->to = NULL;
+ for(; b != e; ++b){
+ if(b->to == x){
+ if((s->ub - lb) > 1)
+ s->ub = b->ub;
+ } else {
+ if(b->to != s->to){
+ if(s->ub){
+ lb = s->ub; ++s;
+ }
+ s->to = b->to;
+ }
+ s->ub = b->ub;
+ }
+ }
+ s->ub = e[-1].ub; ++s;
+ g->nSpans = s - g->span;
+}
+
+static void doGen(Go *g, State *s, unsigned char *bm, unsigned char m){
+ Span *b = g->span, *e = &b[g->nSpans];
+ unsigned int lb = 0;
+ for(; b < e; ++b){
+ if(b->to == s)
+ for(; lb < b->ub; ++lb) bm[lb] |= m;
+ lb = b->ub;
+ }
+}
+#if 0
+static void prt(FILE *o, Go *g, State *s){
+ Span *b = g->span, *e = &b[g->nSpans];
+ unsigned int lb = 0;
+ for(; b < e; ++b){
+ if(b->to == s)
+ printSpan(o, lb, b->ub);
+ lb = b->ub;
+ }
+}
+#endif
+static int matches(Go *g1, State *s1, Go *g2, State *s2){
+ Span *b1 = g1->span, *e1 = &b1[g1->nSpans];
+ unsigned int lb1 = 0;
+ Span *b2 = g2->span, *e2 = &b2[g2->nSpans];
+ unsigned int lb2 = 0;
+ for(;;){
+ for(; b1 < e1 && b1->to != s1; ++b1) lb1 = b1->ub;
+ for(; b2 < e2 && b2->to != s2; ++b2) lb2 = b2->ub;
+ if(b1 == e1) return b2 == e2;
+ if(b2 == e2) return 0;
+ if(lb1 != lb2 || b1->ub != b2->ub) return 0;
+ ++b1; ++b2;
+ }
+}
+
+typedef struct BitMap {
+ Go *go;
+ State *on;
+ struct BitMap *next;
+ unsigned int i;
+ unsigned char m;
+} BitMap;
+
+static BitMap *BitMap_find_go(Go*, State*);
+static BitMap *BitMap_find(State*);
+static void BitMap_gen(FILE *, unsigned int, unsigned int);
+/* static void BitMap_stats(void);*/
+static BitMap *BitMap_new(Go*, State*);
+
+static BitMap *BitMap_first = NULL;
+
+BitMap *
+BitMap_new(Go *g, State *x)
+{
+ BitMap *b = malloc(sizeof(BitMap));
+ b->go = g;
+ b->on = x;
+ b->next = BitMap_first;
+ BitMap_first = b;
+ return b;
+}
+
+BitMap *
+BitMap_find_go(Go *g, State *x){
+ BitMap *b;
+ for(b = BitMap_first; b; b = b->next){
+ if(matches(b->go, b->on, g, x))
+ return b;
+ }
+ return BitMap_new(g, x);
+}
+
+BitMap *
+BitMap_find(State *x){
+ BitMap *b;
+ for(b = BitMap_first; b; b = b->next){
+ if(b->on == x)
+ return b;
+ }
+ return NULL;
+}
+
+void BitMap_gen(FILE *o, unsigned int lb, unsigned int ub){
+ BitMap *b = BitMap_first;
+ if(b){
+ unsigned int n = ub - lb;
+ unsigned int i;
+ unsigned char *bm = malloc(sizeof(unsigned char)*n);
+ fputs("\tstatic unsigned char yybm[] = {", o);
+ for(i = 0; b; i += n){
+ unsigned char m;
+ unsigned int j;
+ memset(bm, 0, n);
+ for(m = 0x80; b && m; b = b->next, m >>= 1){
+ b->i = i; b->m = m;
+ doGen(b->go, b->on, bm-lb, m);
+ }
+ for(j = 0; j < n; ++j){
+ if(j%8 == 0) {fputs("\n\t", o); oline++;}
+ fprintf(o, "%3u, ", (unsigned int) bm[j]);
+ }
+ }
+ fputs("\n\t};\n", o); oline+=2;
+ free(bm);
+ }
+}
+
+#if 0
+void BitMap_stats(void){
+ unsigned int n = 0;
+ BitMap *b;
+ for(b = BitMap_first; b; b = b->next){
+ prt(stderr, b->go, b->on); fputs("\n", stderr);
+ ++n;
+ }
+ fprintf(stderr, "%u bitmaps\n", n);
+ BitMap_first = NULL;
+}
+#endif
+
+static void genGoTo(FILE *o, State *from, State *to, int *readCh,
+ const char *indent)
+{
+#if 0
+ if (*readCh && from->label + 1 != to->label)
+ {
+ fputs("%syych = *YYCURSOR;\n", indent, o); oline++;
+ *readCh = 0;
+ }
+#endif
+ fprintf(o, "%sgoto yy%u;\n", indent, to->label); oline++;
+ useLabel(to->label);
+}
+
+static void genIf(FILE *o, const char *cmp, unsigned int v, int *readCh)
+{
+#if 0
+ if (*readCh)
+ {
+ fputs("\tif((yych = *YYCURSOR) ", o);
+ *readCh = 0;
+ } else {
+#endif
+ fputs("\tif(yych ", o);
+#if 0
+ }
+#endif
+ fprintf(o, "%s '", cmp);
+ prtCh(o, v);
+ fputs("')", o);
+}
+
+static void indent(FILE *o, unsigned int i){
+ while(i-- > 0)
+ fputc('\t', o);
+}
+
+static void need(FILE *o, unsigned int n, int *readCh)
+{
+ unsigned int fillIndex;
+ int hasFillIndex = (0<=vFillIndexes);
+ if (hasFillIndex) {
+ fillIndex = vFillIndexes++;
+ fprintf(o, "\tYYSETSTATE(%u);\n", fillIndex);
+ ++oline;
+ }
+
+ if(n == 1) {
+ fputs("\tif(YYLIMIT == YYCURSOR) YYFILL(1);\n", o); oline++;
+ } else {
+ fprintf(o, "\tif((YYLIMIT - YYCURSOR) < %u) YYFILL(%u);\n", n, n);
+ oline++;
+ }
+
+ if (hasFillIndex) {
+ fprintf(o, "yyFillLabel%u:\n", fillIndex);
+ ++oline;
+ }
+
+ fputs("\tyych = *YYCURSOR;\n", o); oline++;
+ *readCh = 0;
+}
+
+void
+Action_emit(Action *a, FILE *o, int *readCh)
+{
+ int first = 1;
+ unsigned int i;
+ unsigned int back;
+
+ switch (a->type) {
+ case MATCHACT:
+ if(a->state->link){
+ fputs("\t++YYCURSOR;\n", o);
+ need(o, a->state->depth, readCh);
+#if 0
+ } else if (!Action_readAhead(a)) {
+ /* do not read next char if match */
+ fputs("\t++YYCURSOR;\n", o);
+ *readCh = 1;
+#endif
+ } else {
+ fputs("\tyych = *++YYCURSOR;\n", o);
+ *readCh = 0;
+ }
+ oline++;
+ break;
+ case ENTERACT:
+ if(a->state->link){
+ fputs("\t++YYCURSOR;\n", o);
+ fprintf(o, "yy%u:\n", a->d.label); oline+=2;
+ need(o, a->state->depth, readCh);
+ } else {
+ /* we shouldn't need 'rule-following' protection here */
+ fputs("\tyych = *++YYCURSOR;\n", o);
+ fprintf(o, "yy%u:\n", a->d.label); oline+=2;
+ *readCh = 0;
+ }
+ break;
+ case SAVEMATCHACT:
+ if (bUsedYYAccept) {
+ fprintf(o, "\tyyaccept = %u;\n", a->d.selector);
+ oline++;
+ }
+ if(a->state->link){
+ fputs("\tYYMARKER = ++YYCURSOR;\n", o); oline++;
+ need(o, a->state->depth, readCh);
+ } else {
+ fputs("\tyych = *(YYMARKER = ++YYCURSOR);\n", o); oline++;
+ *readCh = 0;
+ }
+ break;
+ case MOVEACT:
+ break;
+ case ACCEPTACT:
+ for(i = 0; i < a->d.Accept.nRules; ++i)
+ if(a->d.Accept.saves[i] != ~0u){
+ if(first){
+ first = 0;
+ bUsedYYAccept = 1;
+ fputs("\tYYCURSOR = YYMARKER;\n", o);
+ fputs("\tswitch(yyaccept){\n", o); oline+=2;
+ }
+ fprintf(o, "\tcase %u:", a->d.Accept.saves[i]);
+ genGoTo(o, a->state, a->d.Accept.rules[i], readCh, "\t");
+ }
+ if(!first) {
+ fputs("\t}\n", o); oline++;
+ }
+ break;
+ case RULEACT:
+ back = RegExp_fixedLength(a->d.rule->d.RuleOp.ctx);
+ if(back != ~0u && back > 0u)
+ fprintf(o, "\tYYCURSOR -= %u;", back);
+ fprintf(o, "\n"); oline++;
+ line_source(o, a->d.rule->d.RuleOp.code->line);
+ SubStr_out(&a->d.rule->d.RuleOp.code->text, o);
+ fprintf(o, "\n"); oline++;
+ if (!iFlag)
+ fprintf(o, "#line %u \"%s\"\n", oline++, outputFileName);
+ break;
+ }
+}
+
+Action *
+Action_new_Accept(State *x, unsigned int n, unsigned int *s, State **r)
+{
+ Action *a = malloc(sizeof(Action));
+ a->type = ACCEPTACT;
+ a->state = x;
+ a->d.Accept.nRules = n;
+ a->d.Accept.saves = s;
+ a->d.Accept.rules = r;
+ x->action = a;
+ return a;
+}
+
+static void doLinear(FILE *o, unsigned int i, Span *s, unsigned int n,
+ State *from, State *next, int *readCh){
+ for(;;){
+ State *bg = s[0].to;
+ while(n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1){
+ if(s[1].to == next && n == 3){
+ indent(o, i);
+ genIf(o, "!=", s[0].ub, readCh);
+ genGoTo(o, from, bg, readCh, "\t");
+ indent(o, i);
+ genGoTo(o, from, next, readCh, "\t");
+ return;
+ } else {
+ indent(o, i);
+ genIf(o, "==", s[0].ub, readCh);
+ genGoTo(o, from, s[1].to, readCh, "\t");
+ }
+ n -= 2; s += 2;
+ }
+ if(n == 1){
+ indent(o, i);
+ genGoTo(o, from, s[0].to, readCh, "\t");
+ return;
+ } else if(n == 2 && bg == next){
+ indent(o, i);
+ genIf(o, ">=", s[0].ub, readCh);
+ genGoTo(o, from, s[1].to, readCh, "\t");
+ indent(o, i);
+ genGoTo(o, from, next, readCh, "\t");
+ return;
+ } else {
+ indent(o, i);
+ genIf(o, "<=", s[0].ub - 1, readCh);
+ genGoTo(o, from, bg, readCh, "\t");
+ n -= 1; s += 1;
+ }
+ }
+ indent(o, i);
+ genGoTo(o, from, next, readCh, "\t");
+}
+
+void
+Go_genLinear(Go *g, FILE *o, State *from, State *next, int *readCh){
+ doLinear(o, 0, g->span, g->nSpans, from, next, readCh);
+}
+
+static void genCases(FILE *o, unsigned int lb, Span *s){
+ if(lb < s->ub){
+ for(;;){
+ fputs("\tcase '", o); prtCh(o, lb); fputs("':", o);
+ if(++lb == s->ub)
+ break;
+ fputs("\n", o); oline++;
+ }
+ }
+}
+
+void
+Go_genSwitch(Go *g, FILE *o, State *from, State *next, int *readCh){
+ if(g->nSpans <= 2){
+ Go_genLinear(g, o, from, next, readCh);
+ } else {
+ State *def = g->span[g->nSpans-1].to;
+ Span **sP = malloc(sizeof(Span*)*(g->nSpans-1)), **r, **s, **t;
+ unsigned int i;
+
+ t = &sP[0];
+ for(i = 0; i < g->nSpans; ++i)
+ if(g->span[i].to != def)
+ *(t++) = &g->span[i];
+
+ if (dFlag)
+ fputs("\tYYDEBUG(-1, yych);\n", o);
+
+#if 0
+ if (*readCh) {
+ fputs("\tswitch((yych = *YYCURSOR)) {\n", o);
+ *readCh = 0;
+ } else
+#endif
+ fputs("\tswitch(yych){\n", o);
+ oline++;
+ while(t != &sP[0]){
+ State *to;
+ r = s = &sP[0];
+ if(*s == &g->span[0])
+ genCases(o, 0, *s);
+ else
+ genCases(o, (*s)[-1].ub, *s);
+ to = (*s)->to;
+ while(++s < t){
+ if((*s)->to == to)
+ genCases(o, (*s)[-1].ub, *s);
+ else
+ *(r++) = *s;
+ }
+ genGoTo(o, from, to, readCh, "\t");
+ t = r;
+ }
+ fputs("\tdefault:", o);
+ genGoTo(o, from, def, readCh, "\t");
+ fputs("\t}\n", o); oline++;
+
+ free(sP);
+ }
+}
+
+static void doBinary(FILE *o, unsigned int i, Span *s, unsigned int n,
+ State *from, State *next, int *readCh){
+ if(n <= 4){
+ doLinear(o, i, s, n, from, next, readCh);
+ } else {
+ unsigned int h = n/2;
+ indent(o, i);
+ genIf(o, "<=", s[h-1].ub - 1, readCh);
+ fputs("{\n", o); oline++;
+ doBinary(o, i+1, &s[0], h, from, next, readCh);
+ indent(o, i); fputs("\t} else {\n", o); oline++;
+ doBinary(o, i+1, &s[h], n - h, from, next, readCh);
+ indent(o, i); fputs("\t}\n", o); oline++;
+ }
+}
+
+void
+Go_genBinary(Go *g, FILE *o, State *from, State *next, int *readCh){
+ doBinary(o, 0, g->span, g->nSpans, from, next, readCh);
+}
+
+void
+Go_genBase(Go *g, FILE *o, State *from, State *next, int *readCh){
+ if(g->nSpans == 0)
+ return;
+ if(!sFlag){
+ Go_genSwitch(g, o, from, next, readCh);
+ return;
+ }
+ if(g->nSpans > 8){
+ Span *bot = &g->span[0], *top = &g->span[g->nSpans-1];
+ unsigned int util;
+ if(bot[0].to == top[0].to){
+ util = (top[-1].ub - bot[0].ub)/(g->nSpans - 2);
+ } else {
+ if(bot[0].ub > (top[0].ub - top[-1].ub)){
+ util = (top[0].ub - bot[0].ub)/(g->nSpans - 1);
+ } else {
+ util = top[-1].ub/(g->nSpans - 1);
+ }
+ }
+ if(util <= 2){
+ Go_genSwitch(g, o, from, next, readCh);
+ return;
+ }
+ }
+ if(g->nSpans > 5){
+ Go_genBinary(g, o, from, next, readCh);
+ } else {
+ Go_genLinear(g, o, from, next, readCh);
+ }
+}
+
+void
+Go_genGoto(Go *g, FILE *o, State *from, State *next, int *readCh){
+ unsigned int i;
+ if(bFlag){
+ for(i = 0; i < g->nSpans; ++i){
+ State *to = g->span[i].to;
+ if(to && to->isBase){
+ BitMap *b = BitMap_find(to);
+ if(b && matches(b->go, b->on, g, to)){
+ Go go;
+ go.span = malloc(sizeof(Span)*g->nSpans);
+ Go_unmap(&go, g, to);
+ fprintf(o, "\tif(yybm[%u+", b->i);
+#if 0
+ if (*readCh)
+ fputs("(yych = *YYCURSOR)", o);
+ else
+#endif
+ fputs("yych", o);
+ fprintf(o, "] & %u) {\n", (unsigned int) b->m); oline++;
+ genGoTo(o, from, to, readCh, "\t\t");
+ fputs("\t}\n", o); oline++;
+ Go_genBase(&go, o, from, next, readCh);
+ free(go.span);
+ return;
+ }
+ }
+ }
+ }
+ Go_genBase(g, o, from, next, readCh);
+}
+
+void State_emit(State *s, FILE *o, int *readCh){
+ if (vUsedLabels[s->label])
+ fprintf(o, "yy%u:", s->label);
+ if (dFlag)
+ fprintf(o, "\n\tYYDEBUG(%u, *YYCURSOR);\n", s->label);
+ Action_emit(s->action, o, readCh);
+}
+
+static unsigned int merge(Span *x0, State *fg, State *bg){
+ Span *x = x0, *f = fg->go.span, *b = bg->go.span;
+ unsigned int nf = fg->go.nSpans, nb = bg->go.nSpans;
+ State *prev = NULL, *to;
+ /* NB: we assume both spans are for same range */
+ for(;;){
+ if(f->ub == b->ub){
+ to = f->to == b->to? bg : f->to;
+ if(to == prev){
+ --x;
+ } else {
+ x->to = prev = to;
+ }
+ x->ub = f->ub;
+ ++x; ++f; --nf; ++b; --nb;
+ if(nf == 0 && nb == 0)
+ return x - x0;
+ }
+ while(f->ub < b->ub){
+ to = f->to == b->to? bg : f->to;
+ if(to == prev){
+ --x;
+ } else {
+ x->to = prev = to;
+ }
+ x->ub = f->ub;
+ ++x; ++f; --nf;
+ }
+ while(b->ub < f->ub){
+ to = b->to == f->to? bg : f->to;
+ if(to == prev){
+ --x;
+ } else {
+ x->to = prev = to;
+ }
+ x->ub = b->ub;
+ ++x; ++b; --nb;
+ }
+ }
+}
+
+const unsigned int cInfinity = ~0;
+
+typedef struct SCC {
+ State **top, **stk;
+} SCC;
+
+static void SCC_init(SCC*, unsigned int);
+static SCC *SCC_new(unsigned int);
+static void SCC_destroy(SCC*);
+static void SCC_delete(SCC*);
+static void SCC_traverse(SCC*, State*);
+
+static void
+SCC_init(SCC *s, unsigned int size)
+{
+ s->top = s->stk = malloc(sizeof(State*)*size);
+}
+
+static SCC *
+SCC_new(unsigned int size){
+ SCC *s = malloc(sizeof(SCC));
+ s->top = s->stk = malloc(sizeof(State*)*size);
+ return s;
+}
+
+static void
+SCC_destroy(SCC *s){
+ free(s->stk);
+}
+
+static void
+SCC_delete(SCC *s){
+ free(s->stk);
+ free(s);
+}
+
+static void SCC_traverse(SCC *s, State *x){
+ unsigned int k, i;
+
+ *s->top = x;
+ k = ++s->top - s->stk;
+ x->depth = k;
+ for(i = 0; i < x->go.nSpans; ++i){
+ State *y = x->go.span[i].to;
+ if(y){
+ if(y->depth == 0)
+ SCC_traverse(s, y);
+ if(y->depth < x->depth)
+ x->depth = y->depth;
+ }
+ }
+ if(x->depth == k)
+ do {
+ (*--s->top)->depth = cInfinity;
+ (*s->top)->link = x;
+ } while(*s->top != x);
+}
+
+static unsigned int maxDist(State *s){
+ unsigned int mm = 0, i;
+ for(i = 0; i < s->go.nSpans; ++i){
+ State *t = s->go.span[i].to;
+ if(t){
+ unsigned int m = 1;
+ if(!t->link) {
+ if (t->depth == -1)
+ t->depth = maxDist(t);
+ m += t->depth;
+ }
+ if(m > mm)
+ mm = m;
+ }
+ }
+ return mm;
+}
+
+static void calcDepth(State *head){
+ State *t, *s;
+ for(s = head; s; s = s->next){
+ if(s->link == s){
+ unsigned int i;
+ for(i = 0; i < s->go.nSpans; ++i){
+ t = s->go.span[i].to;
+ if(t && t->link == s)
+ goto inSCC;
+ }
+ s->link = NULL;
+ } else {
+ inSCC:
+ s->depth = maxDist(s);
+ }
+ }
+}
+
+void DFA_findSCCs(DFA *d){
+ SCC scc;
+ State *s;
+
+ SCC_init(&scc, d->nStates);
+ for(s = d->head; s; s = s->next){
+ s->depth = 0;
+ s->link = NULL;
+ }
+
+ for(s = d->head; s; s = s->next)
+ if(!s->depth)
+ SCC_traverse(&scc, s);
+
+ calcDepth(d->head);
+
+ SCC_destroy(&scc);
+}
+
+void DFA_split(DFA *d, State *s){
+ State *move = State_new();
+ Action_new_Move(move);
+ DFA_addState(d, &s->next, move);
+ move->link = s->link;
+ move->rule = s->rule;
+ move->go = s->go;
+ s->rule = NULL;
+ s->go.nSpans = 1;
+ s->go.span = malloc(sizeof(Span));
+ s->go.span[0].ub = d->ubChar;
+ s->go.span[0].to = move;
+}
+
+void DFA_emit(DFA *d, FILE *o){
+ static unsigned int label = 0;
+ State *s;
+ unsigned int i, bitmap_brace = 0;
+ unsigned int nRules = 0;
+ unsigned int nSaves = 0;
+ unsigned int *saves;
+ unsigned int nOrgOline;
+ State **rules;
+ State *accept = NULL;
+ Span *span;
+ FILE *tmpo;
+ int hasFillLabels;
+ int maxFillIndexes, orgVFillIndexes;
+ unsigned int start_label;
+
+ hasFillLabels = (0<=vFillIndexes);
+ if (hasFillLabels && label!=0) {
+ fputs("re2c : error : multiple /*!re2c blocks aren't supported when -f is specified\n", stderr);
+ exit(1);
+ }
+
+ DFA_findSCCs(d);
+ d->head->link = d->head;
+
+ maxFill = 1;
+ for(s = d->head; s; s = s->next) {
+ s->depth = maxDist(s);
+ if (maxFill < s->depth)
+ maxFill = s->depth;
+ if(s->rule && s->rule->d.RuleOp.accept >= nRules)
+ nRules = s->rule->d.RuleOp.accept + 1;
+ }
+
+ saves = malloc(sizeof(unsigned int)*nRules);
+ memset(saves, ~0, (nRules)*sizeof(unsigned int));
+
+ /* mark backtracking points */
+ for(s = d->head; s; s = s->next){
+ RegExp *ignore = NULL;/*RuleOp*/
+ if(s->rule){
+ for(i = 0; i < s->go.nSpans; ++i)
+ if(s->go.span[i].to && !s->go.span[i].to->rule){
+ free(s->action);
+ if(saves[s->rule->d.RuleOp.accept] == ~0u)
+ saves[s->rule->d.RuleOp.accept] = nSaves++;
+ Action_new_Save(s, saves[s->rule->d.RuleOp.accept]);
+ continue;
+ }
+ ignore = s->rule;
+ }
+ }
+
+ /* insert actions */
+ rules = malloc(sizeof(State*)*nRules);
+ memset(rules, 0, (nRules)*sizeof(State*));
+ for(s = d->head; s; s = s->next){
+ State *ow;
+ if(!s->rule){
+ ow = accept;
+ } else {
+ if(!rules[s->rule->d.RuleOp.accept]){
+ State *n = State_new();
+ Action_new_Rule(n, s->rule);
+ rules[s->rule->d.RuleOp.accept] = n;
+ DFA_addState(d, &s->next, n);
+ }
+ ow = rules[s->rule->d.RuleOp.accept];
+ }
+ for(i = 0; i < s->go.nSpans; ++i)
+ if(!s->go.span[i].to){
+ if(!ow){
+ ow = accept = State_new();
+ Action_new_Accept(accept, nRules, saves, rules);
+ DFA_addState(d, &s->next, accept);
+ }
+ s->go.span[i].to = ow;
+ }
+ }
+
+ /* split ``base'' states into two parts */
+ for(s = d->head; s; s = s->next){
+ s->isBase = 0;
+ if(s->link){
+ for(i = 0; i < s->go.nSpans; ++i){
+ if(s->go.span[i].to == s){
+ s->isBase = 1;
+ DFA_split(d, s);
+ if(bFlag)
+ BitMap_find_go(&s->next->go, s);
+ s = s->next;
+ break;
+ }
+ }
+ }
+ }
+
+ /* find ``base'' state, if possible */
+ span = malloc(sizeof(Span)*(d->ubChar - d->lbChar));
+ for(s = d->head; s; s = s->next){
+ if(!s->link){
+ for(i = 0; i < s->go.nSpans; ++i){
+ State *to = s->go.span[i].to;
+ if(to && to->isBase){
+ unsigned int nSpans;
+ to = to->go.span[0].to;
+ nSpans = merge(span, s, to);
+ if(nSpans < s->go.nSpans){
+ free(s->go.span);
+ s->go.nSpans = nSpans;
+ s->go.span = malloc(sizeof(Span)*nSpans);
+ memcpy(s->go.span, span, nSpans*sizeof(Span));
+ }
+ break;
+ }
+ }
+ }
+ }
+ free(span);
+
+ free(d->head->action);
+
+ if(bFlag) {
+ fputs("{\n", o);
+ oline++;
+ bitmap_brace = 1;
+ BitMap_gen(o, d->lbChar, d->ubChar);
+ }
+
+ bUsedYYAccept = 0;
+
+ start_label = label;
+
+ Action_new_Enter(d->head, label++);
+
+ for(s = d->head; s; s = s->next)
+ s->label = label++;
+
+ nOrgOline = oline;
+ maxFillIndexes = vFillIndexes;
+ orgVFillIndexes = vFillIndexes;
+#ifdef _WIN32
+ tmpo = win32_tmpfile();
+#else
+ tmpo = tmpfile();
+#endif
+ for(s = d->head; s; s = s->next){
+ int readCh = 0;
+ State_emit(s, tmpo, &readCh);
+ Go_genGoto(&s->go, tmpo, s, s->next, &readCh);
+ }
+ fclose(tmpo);
+ maxFillIndexes = vFillIndexes;
+ vFillIndexes = orgVFillIndexes;
+ oline = nOrgOline;
+
+ fputs("\n", o);
+ oline++;
+ if (!iFlag)
+ fprintf(o, "#line %u \"%s\"\n", oline++, outputFileName);
+
+ if (!hasFillLabels) {
+ fputs("{\n\tYYCTYPE yych;\n", o);
+ oline += 2;
+ if (bUsedYYAccept) {
+ fputs("\tunsigned int yyaccept;\n", o);
+ oline++;
+ }
+ } else {
+ fputs("{\n\n", o);
+ oline += 2;
+ }
+
+ if (!hasFillLabels) {
+ fprintf(o, "\tgoto yy%u;\n", start_label);
+ oline++;
+ useLabel(label);
+ } else {
+ int i;
+ fputs("\tswitch(YYGETSTATE()) {\n", o);
+ fputs("\t\tcase -1: goto yy0;\n", o);
+
+ for (i=0; i<maxFillIndexes; ++i)
+ fprintf(o, "\t\tcase %u: goto yyFillLabel%u;\n", i, i);
+
+ fputs("\t\tdefault: /* abort() */;\n", o);
+ fputs("\t}\n", o);
+ fputs("yyNext:\n", o);
+
+ oline += maxFillIndexes;
+ oline += 5;
+ }
+
+ for(s = d->head; s; s = s->next){
+ int readCh = 0;
+ State_emit(s, o, &readCh);
+ Go_genGoto(&s->go, o, s, s->next, &readCh);
+ }
+ fputs("}\n", o); oline++;
+ if (bitmap_brace) {
+ fputs("}\n", o);
+ oline++;
+ }
+
+ BitMap_first = NULL;
+
+ free(saves);
+ free(rules);
+}
diff --git a/tools/re2c/dfa.c b/tools/re2c/dfa.c
new file mode 100644
index 0000000..16509de
--- /dev/null
+++ b/tools/re2c/dfa.c
@@ -0,0 +1,253 @@
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include "tools/re2c/globals.h"
+#include "tools/re2c/substr.h"
+#include "tools/re2c/dfa.h"
+
+#define octCh(c) ('0' + c%8)
+
+void prtCh(FILE *o, unsigned char c){
+ unsigned char oc = talx[c];
+ switch(oc){
+ case '\'': fputs("\\'", o); break;
+ case '\n': fputs("\\n", o); break;
+ case '\t': fputs("\\t", o); break;
+ case '\v': fputs("\\v", o); break;
+ case '\b': fputs("\\b", o); break;
+ case '\r': fputs("\\r", o); break;
+ case '\f': fputs("\\f", o); break;
+ case '\a': fputs("\\a", o); break;
+ case '\\': fputs("\\\\", o); break;
+ default:
+ if(isprint(oc))
+ fputc(oc, o);
+ else
+ fprintf(o, "\\%c%c%c", octCh(c/64), octCh(c/8), octCh(c));
+ }
+}
+
+void printSpan(FILE *o, unsigned int lb, unsigned int ub){
+ if(lb > ub)
+ fputc('*', o);
+ fputc('[', o);
+ if((ub - lb) == 1){
+ prtCh(o, lb);
+ } else {
+ prtCh(o, lb);
+ fputc('-', o);
+ prtCh(o, ub-1);
+ }
+ fputc(']', o);
+}
+
+unsigned int
+Span_show(Span *s, FILE *o, unsigned int lb)
+{
+ if(s->to){
+ printSpan(o, lb, s->ub);
+ fprintf(o, " %u; ", s->to->label);
+ }
+ return s->ub;
+}
+
+void
+State_out(FILE *o, const State *s){
+ unsigned int lb, i;
+ fprintf(o, "state %u", s->label);
+ if(s->rule)
+ fprintf(o, " accepts %u", s->rule->d.RuleOp.accept);
+ fputs("\n", o); oline++;
+ lb = 0;
+ for(i = 0; i < s->go.nSpans; ++i)
+ lb = Span_show(&s->go.span[i], o, lb);
+}
+
+void
+DFA_out(FILE *o, const DFA *dfa){
+ State *s;
+ for(s = dfa->head; s; s = s->next) {
+ State_out(o, s);
+ fputs("\n\n", o); oline+=2;
+ }
+}
+
+State *
+State_new(void)
+{
+ State *s = malloc(sizeof(State));
+ s->label = 0;
+ s->rule = NULL;
+ s->next = NULL;
+ s->link = NULL;
+ s->depth = 0;
+ s->kCount = 0;
+ s->kernel = NULL;
+ s->isBase = 0;
+ s->action = NULL;
+ s->go.nSpans = 0;
+ s->go.span = NULL;
+ return s;
+}
+
+void
+State_delete(State *s)
+{
+ if (s->kernel)
+ free(s->kernel);
+ if (s->go.span)
+ free(s->go.span);
+ free(s);
+}
+
+static Ins **closure(Ins **cP, Ins *i){
+ while(!isMarked(i)){
+ mark(i);
+ *(cP++) = i;
+ if(i->i.tag == FORK){
+ cP = closure(cP, i + 1);
+ i = (Ins*) i->i.link;
+ } else if(i->i.tag == GOTO){
+ i = (Ins*) i->i.link;
+ } else
+ break;
+ }
+ return cP;
+}
+
+typedef struct GoTo {
+ Char ch;
+ void *to;
+} GoTo;
+
+DFA *
+DFA_new(Ins *ins, unsigned int ni, unsigned int lb, unsigned int ub, Char *rep)
+{
+ DFA *d = malloc(sizeof(DFA));
+ Ins **work = malloc(sizeof(Ins*)*(ni+1));
+ unsigned int nc = ub - lb;
+ GoTo *goTo = malloc(sizeof(GoTo)*nc);
+ Span *span = malloc(sizeof(Span)*nc);
+
+ d->lbChar = lb;
+ d->ubChar = ub;
+ memset((char*) goTo, 0, nc*sizeof(GoTo));
+ d->tail = &d->head;
+ d->head = NULL;
+ d->nStates = 0;
+ d->toDo = NULL;
+ DFA_findState(d, work, closure(work, &ins[0]) - work);
+ while(d->toDo){
+ State *s = d->toDo;
+
+ Ins **cP, **iP, *i;
+ unsigned int nGoTos = 0;
+ unsigned int j;
+
+ d->toDo = s->link;
+ s->rule = NULL;
+ for(iP = s->kernel; (i = *iP); ++iP){
+ if(i->i.tag == CHAR){
+ Ins *j2;
+ for(j2 = i + 1; j2 < (Ins*) i->i.link; ++j2){
+ if(!(j2->c.link = goTo[j2->c.value - lb].to))
+ goTo[nGoTos++].ch = j2->c.value;
+ goTo[j2->c.value - lb].to = j2;
+ }
+ } else if(i->i.tag == TERM){
+ if(!s->rule || ((RegExp *)i->i.link)->d.RuleOp.accept < s->rule->d.RuleOp.accept)
+ s->rule = (RegExp *)i->i.link;
+ }
+ }
+
+ for(j = 0; j < nGoTos; ++j){
+ GoTo *go = &goTo[goTo[j].ch - lb];
+ i = (Ins*) go->to;
+ for(cP = work; i; i = (Ins*) i->c.link)
+ cP = closure(cP, i + i->c.bump);
+ go->to = DFA_findState(d, work, cP - work);
+ }
+
+ s->go.nSpans = 0;
+ for(j = 0; j < nc;){
+ State *to = (State*) goTo[rep[j]].to;
+ while(++j < nc && goTo[rep[j]].to == to);
+ span[s->go.nSpans].ub = lb + j;
+ span[s->go.nSpans].to = to;
+ s->go.nSpans++;
+ }
+
+ for(j = nGoTos; j-- > 0;)
+ goTo[goTo[j].ch - lb].to = NULL;
+
+ s->go.span = malloc(sizeof(Span)*s->go.nSpans);
+ memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span));
+
+ Action_new_Match(s);
+
+ }
+ free(work);
+ free(goTo);
+ free(span);
+
+ return d;
+}
+
+void
+DFA_delete(DFA *d){
+ State *s;
+ while((s = d->head)){
+ d->head = s->next;
+ State_delete(s);
+ }
+}
+
+void DFA_addState(DFA *d, State **a, State *s){
+ s->label = d->nStates++;
+ s->next = *a;
+ *a = s;
+ if(a == d->tail)
+ d->tail = &s->next;
+}
+
+State *DFA_findState(DFA *d, Ins **kernel, unsigned int kCount){
+ Ins **cP, **iP, *i;
+ State *s;
+
+ kernel[kCount] = NULL;
+
+ cP = kernel;
+ for(iP = kernel; (i = *iP); ++iP){
+ if(i->i.tag == CHAR || i->i.tag == TERM){
+ *cP++ = i;
+ } else {
+ unmark(i);
+ }
+ }
+ kCount = cP - kernel;
+ kernel[kCount] = NULL;
+
+ for(s = d->head; s; s = s->next){
+ if(s->kCount == kCount){
+ for(iP = s->kernel; (i = *iP); ++iP)
+ if(!isMarked(i))
+ goto nextState;
+ goto unmarkAll;
+ }
+ nextState:;
+ }
+
+ s = State_new();
+ DFA_addState(d, d->tail, s);
+ s->kCount = kCount;
+ s->kernel = malloc(sizeof(Ins*)*(kCount+1));
+ memcpy(s->kernel, kernel, (kCount+1)*sizeof(Ins*));
+ s->link = d->toDo;
+ d->toDo = s;
+
+unmarkAll:
+ for(iP = kernel; (i = *iP); ++iP)
+ unmark(i);
+
+ return s;
+}
diff --git a/tools/re2c/dfa.h b/tools/re2c/dfa.h
new file mode 100644
index 0000000..da4d673
--- /dev/null
+++ b/tools/re2c/dfa.h
@@ -0,0 +1,173 @@
+#ifndef re2c_dfa_h
+#define re2c_dfa_h
+
+#include <stdio.h>
+#include "tools/re2c/re.h"
+
+extern void prtCh(FILE *, unsigned char);
+extern void printSpan(FILE *, unsigned int, unsigned int);
+
+struct DFA;
+struct State;
+
+typedef enum {
+ MATCHACT = 1,
+ ENTERACT,
+ SAVEMATCHACT,
+ MOVEACT,
+ ACCEPTACT,
+ RULEACT
+} ActionType;
+
+typedef struct Action {
+ struct State *state;
+ ActionType type;
+ union {
+ /* data for Enter */
+ unsigned int label;
+ /* data for SaveMatch */
+ unsigned int selector;
+ /* data for Accept */
+ struct {
+ unsigned int nRules;
+ unsigned int *saves;
+ struct State **rules;
+ } Accept;
+ /* data for Rule */
+ RegExp *rule; /* RuleOp */
+ } d;
+} Action;
+
+void Action_emit(Action*, FILE *, int *);
+
+typedef struct Span {
+ unsigned int ub;
+ struct State *to;
+} Span;
+
+unsigned int Span_show(Span*, FILE *, unsigned int);
+
+typedef struct Go {
+ unsigned int nSpans;
+ Span *span;
+} Go;
+
+typedef struct State {
+ unsigned int label;
+ RegExp *rule; /* RuleOp */
+ struct State *next;
+ struct State *link;
+ unsigned int depth; /* for finding SCCs */
+ unsigned int kCount;
+ Ins **kernel;
+ unsigned int isBase:1;
+ Go go;
+ Action *action;
+} State;
+
+void Go_genGoto(Go*, FILE *, State*, State*, int*);
+void Go_genBase(Go*, FILE *, State*, State*, int*);
+void Go_genLinear(Go*, FILE *, State*, State*, int*);
+void Go_genBinary(Go*, FILE *, State*, State*, int*);
+void Go_genSwitch(Go*, FILE *, State*, State*, int*);
+void Go_compact(Go*);
+void Go_unmap(Go*, Go*, State*);
+
+State *State_new(void);
+void State_delete(State*);
+void State_emit(State*, FILE *, int *);
+void State_out(FILE *, const State*);
+
+typedef struct DFA {
+ unsigned int lbChar;
+ unsigned int ubChar;
+ unsigned int nStates;
+ State *head, **tail;
+ State *toDo;
+} DFA;
+
+DFA *DFA_new(Ins*, unsigned int, unsigned int, unsigned int, Char*);
+void DFA_delete(DFA*);
+void DFA_addState(DFA*, State**, State*);
+State *DFA_findState(DFA*, Ins**, unsigned int);
+void DFA_split(DFA*, State*);
+
+void DFA_findSCCs(DFA*);
+void DFA_emit(DFA*, FILE *);
+void DFA_out(FILE *, const DFA*);
+
+static Action *
+Action_new_Match(State *s)
+{
+ Action *a = malloc(sizeof(Action));
+ a->type = MATCHACT;
+ a->state = s;
+ s->action = a;
+ return a;
+}
+
+static Action *
+Action_new_Enter(State *s, unsigned int l)
+{
+ Action *a = malloc(sizeof(Action));
+ a->type = ENTERACT;
+ a->state = s;
+ a->d.label = l;
+ s->action = a;
+ return a;
+}
+
+static Action *
+Action_new_Save(State *s, unsigned int i)
+{
+ Action *a = malloc(sizeof(Action));
+ a->type = SAVEMATCHACT;
+ a->state = s;
+ a->d.selector = i;
+ s->action = a;
+ return a;
+}
+
+static Action *
+Action_new_Move(State *s)
+{
+ Action *a = malloc(sizeof(Action));
+ a->type = MOVEACT;
+ a->state = s;
+ s->action = a;
+ return a;
+}
+
+Action *Action_new_Accept(State*, unsigned int, unsigned int*, State**);
+
+static Action *
+Action_new_Rule(State *s, RegExp *r) /* RuleOp */
+{
+ Action *a = malloc(sizeof(Action));
+ a->type = RULEACT;
+ a->state = s;
+ a->d.rule = r;
+ s->action = a;
+ return a;
+}
+
+static int
+Action_isRule(Action *a)
+{
+ return a->type == RULEACT;
+}
+
+static int
+Action_isMatch(Action *a)
+{
+ return a->type == MATCHACT;
+}
+
+static int
+Action_readAhead(Action *a)
+{
+ return !Action_isMatch(a) ||
+ (a->state && a->state->next && !Action_isRule(a->state->next->action));
+}
+
+#endif
diff --git a/tools/re2c/doc/loplas.ps.gz b/tools/re2c/doc/loplas.ps.gz
new file mode 100644
index 0000000..d1a9191
--- /dev/null
+++ b/tools/re2c/doc/loplas.ps.gz
Binary files differ
diff --git a/tools/re2c/doc/sample.bib b/tools/re2c/doc/sample.bib
new file mode 100644
index 0000000..1f34ab1
--- /dev/null
+++ b/tools/re2c/doc/sample.bib
@@ -0,0 +1,48 @@
+@Article{Bumbulis94,
+ author = {Peter Bumbulis and Donald D. Cowan},
+ title = {RE2C -- A More Versatile Scanner Generator},
+ journal = "ACM Letters on Programming Languages and Systems",
+ volume = 2,
+ number = "1--4",
+ year = 1994,
+ abstract = {
+ It is usually claimed that lexical analysis routines are still coded by
+ hand, despite the widespread availability of scanner generators, for
+ efficiency reasons. While efficiency is a consideration, there exist
+ freely available scanner generators such as GLA \cite{Gray88} that can
+ generate scanners that are faster than most hand-coded ones. However,
+ most generated scanners are tailored for a particular environment, and
+ retargetting these scanners to other environments, if possible, is
+ usually complex enough to make a hand-coded scanner more appealing. In
+ this paper we describe RE2C, a scanner generator that not only generates
+ scanners which are faster (and usually smaller) than those produced by
+ any other scanner generator known to the authors, including GLA, but
+ also adapt easily to any environment.
+ }
+}
+@Article{Gray88,
+ author = {Robert W. Gray},
+ title = {{$\gamma$-GLA} - {A} Generator for Lexical Analyzers That
+ Programmers Can Use},
+ journal = {USENIX Conference Proceedings},
+ year = {1988},
+ month = {June},
+ pages = {147-160},
+ abstract = {Writing an efficient lexical analyzer for even a simple
+ language is not a trivial task, and should not be done by hand. We
+ describe GLA, a tool that generates very efficient scanners. These
+ scanners do not use the conventional transition matrix, but instead
+ use a few 128 element vectors. Scanning time is only slightly
+ greater than the absolute minimum --- the time it takes to look at
+ each character in a file. The GLA language allows simple, concise
+ specification of scanners. Augmenting regular expressions with
+ auxiliary scanners easily handles nasty problems such as C comments
+ and C literal constants. We formalize the connection between token
+ scanning and token processing by associating a processor with
+ appropriate patterns. A library of canned descriptions simplifies the
+ specification of commonly used language pieces --- such as,
+ C\_IDENTIFIERS, C\_STRINGS, PASCAL\_COMMENTS, etc. Finally, carefully
+ tuned lexical analysis support modules are provided for error
+ handling, input buffering, storing identifiers in hash tables and
+ manipulating denotations.}
+}
diff --git a/tools/re2c/examples/basemmap.c b/tools/re2c/examples/basemmap.c
new file mode 100644
index 0000000..3e5b037
--- /dev/null
+++ b/tools/re2c/examples/basemmap.c
@@ -0,0 +1,26 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE 0
+#endif
+
+volatile char ch;
+
+main(){
+ struct stat statbuf;
+ uchar *buf;
+ fstat(0, &statbuf);
+ buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
+ 0, 0);
+ if(buf != (uchar*)(-1)){
+ uchar *cur, *lim = &buf[statbuf.st_size];
+ for(cur = buf; buf != lim; ++cur){
+ ch = *cur;
+ }
+ munmap(buf, statbuf.st_size);
+ }
+}
diff --git a/tools/re2c/examples/c.re b/tools/re2c/examples/c.re
new file mode 100644
index 0000000..419964f
--- /dev/null
+++ b/tools/re2c/examples/c.re
@@ -0,0 +1,272 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+ "auto" { RET(AUTO); }
+ "break" { RET(BREAK); }
+ "case" { RET(CASE); }
+ "char" { RET(CHAR); }
+ "const" { RET(CONST); }
+ "continue" { RET(CONTINUE); }
+ "default" { RET(DEFAULT); }
+ "do" { RET(DO); }
+ "double" { RET(DOUBLE); }
+ "else" { RET(ELSE); }
+ "enum" { RET(ENUM); }
+ "extern" { RET(EXTERN); }
+ "float" { RET(FLOAT); }
+ "for" { RET(FOR); }
+ "goto" { RET(GOTO); }
+ "if" { RET(IF); }
+ "int" { RET(INT); }
+ "long" { RET(LONG); }
+ "register" { RET(REGISTER); }
+ "return" { RET(RETURN); }
+ "short" { RET(SHORT); }
+ "signed" { RET(SIGNED); }
+ "sizeof" { RET(SIZEOF); }
+ "static" { RET(STATIC); }
+ "struct" { RET(STRUCT); }
+ "switch" { RET(SWITCH); }
+ "typedef" { RET(TYPEDEF); }
+ "union" { RET(UNION); }
+ "unsigned" { RET(UNSIGNED); }
+ "void" { RET(VOID); }
+ "volatile" { RET(VOLATILE); }
+ "while" { RET(WHILE); }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\[\n\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\[\n\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \t\v\f]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+ printf("%d\n", t);
+*/
+ }
+ close(in.fd);
+}
diff --git a/tools/re2c/examples/cmmap.re b/tools/re2c/examples/cmmap.re
new file mode 100644
index 0000000..bc4d498
--- /dev/null
+++ b/tools/re2c/examples/cmmap.re
@@ -0,0 +1,267 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int unint;
+typedef unsigned char uchar;
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ uchar *tok, *ptr, *cur, *pos, *lim, *eof;
+ unint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ unint cnt = s->lim - s->tok;
+ uchar *buf = malloc((cnt + 1)*sizeof(uchar));
+ memcpy(buf, s->tok, cnt);
+ cursor = &buf[cursor - s->tok];
+ s->pos = &buf[s->pos - s->tok];
+ s->ptr = &buf[s->ptr - s->tok];
+ s->lim = &buf[cnt];
+ s->eof = s->lim; *(s->eof)++ = '\n';
+ s->tok = buf;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+ "auto" { RET(AUTO); }
+ "break" { RET(BREAK); }
+ "case" { RET(CASE); }
+ "char" { RET(CHAR); }
+ "const" { RET(CONST); }
+ "continue" { RET(CONTINUE); }
+ "default" { RET(DEFAULT); }
+ "do" { RET(DO); }
+ "double" { RET(DOUBLE); }
+ "else" { RET(ELSE); }
+ "enum" { RET(ENUM); }
+ "extern" { RET(EXTERN); }
+ "float" { RET(FLOAT); }
+ "for" { RET(FOR); }
+ "goto" { RET(GOTO); }
+ "if" { RET(IF); }
+ "int" { RET(INT); }
+ "long" { RET(LONG); }
+ "register" { RET(REGISTER); }
+ "return" { RET(RETURN); }
+ "short" { RET(SHORT); }
+ "signed" { RET(SIGNED); }
+ "sizeof" { RET(SIZEOF); }
+ "static" { RET(STATIC); }
+ "struct" { RET(STRUCT); }
+ "switch" { RET(SWITCH); }
+ "typedef" { RET(TYPEDEF); }
+ "union" { RET(UNION); }
+ "unsigned" { RET(UNSIGNED); }
+ "void" { RET(VOID); }
+ "volatile" { RET(VOLATILE); }
+ "while" { RET(WHILE); }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\[\n\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\[\n\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \t\v\f]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE 0
+#endif
+
+main(){
+ Scanner in;
+ struct stat statbuf;
+ uchar *buf;
+ fstat(0, &statbuf);
+ buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
+ 0, 0);
+ if(buf != (uchar*)(-1)){
+ int t;
+ in.lim = &(in.cur = buf)[statbuf.st_size];
+ in.pos = NULL;
+ in.eof = NULL;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+ printf("%d\n", t);
+*/
+ }
+ munmap(buf, statbuf.st_size);
+ }
+}
diff --git a/tools/re2c/examples/cnokw.re b/tools/re2c/examples/cnokw.re
new file mode 100644
index 0000000..bdc1279
--- /dev/null
+++ b/tools/re2c/examples/cnokw.re
@@ -0,0 +1,239 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\[\n\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\[\n\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \t\v\f]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+ printf("%d\n", t);
+*/
+ }
+ close(in.fd);
+}
diff --git a/tools/re2c/examples/cunroll.re b/tools/re2c/examples/cunroll.re
new file mode 100644
index 0000000..dd9d805
--- /dev/null
+++ b/tools/re2c/examples/cunroll.re
@@ -0,0 +1,258 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+I = L|D;
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
+X = any\[*/];
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+
+ L { RET(ID); }
+ L I { RET(ID); }
+ L I I { RET(ID); }
+ L I I I { RET(ID); }
+ L I I I I { RET(ID); }
+ L I I I I I { RET(ID); }
+ L I I I I I I { RET(ID); }
+ L I I I I I I I { RET(ID); }
+ L I* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\[\n\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\[\n\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \t\v\f]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ X { goto comment; }
+ X X { goto comment; }
+ X X X { goto comment; }
+ X X X X { goto comment; }
+ X X X X X { goto comment; }
+ X X X X X X { goto comment; }
+ X X X X X X X { goto comment; }
+ X X X X X X X X { goto comment; }
+ any { goto comment; }
+*/
+}
+
+main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
+ printf("%d\n", t);
+*/
+ }
+ close(in.fd);
+}
diff --git a/tools/re2c/examples/modula.re b/tools/re2c/examples/modula.re
new file mode 100644
index 0000000..0468ba4
--- /dev/null
+++ b/tools/re2c/examples/modula.re
@@ -0,0 +1,202 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL {cursor = fill(s, cursor);}
+
+#define RETURN(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+ uint depth;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\000-\377];
+digit = [0-9];
+letter = [a-zA-Z];
+*/
+
+/*!re2c
+ "(*" { depth = 1; goto comment; }
+
+ digit + {RETURN(1);}
+ digit + / ".." {RETURN(1);}
+ [0-7] + "B" {RETURN(2);}
+ [0-7] + "C" {RETURN(3);}
+ digit [0-9A-F] * "H" {RETURN(4);}
+ digit + "." digit * ("E" ([+-]) ? digit +) ? {RETURN(5);}
+ ['] (any\[\n']) * ['] | ["] (any\[\n"]) * ["] {RETURN(6);}
+
+ "#" {RETURN(7);}
+ "&" {RETURN(8);}
+ "(" {RETURN(9);}
+ ")" {RETURN(10);}
+ "*" {RETURN(11);}
+ "+" {RETURN(12);}
+ "," {RETURN(13);}
+ "-" {RETURN(14);}
+ "." {RETURN(15);}
+ ".." {RETURN(16);}
+ "/" {RETURN(17);}
+ ":" {RETURN(18);}
+ ":=" {RETURN(19);}
+ ";" {RETURN(20);}
+ "<" {RETURN(21);}
+ "<=" {RETURN(22);}
+ "<>" {RETURN(23);}
+ "=" {RETURN(24);}
+ ">" {RETURN(25);}
+ ">=" {RETURN(26);}
+ "[" {RETURN(27);}
+ "]" {RETURN(28);}
+ "^" {RETURN(29);}
+ "{" {RETURN(30);}
+ "|" {RETURN(31);}
+ "}" {RETURN(32);}
+ "~" {RETURN(33);}
+
+ "AND" {RETURN(34);}
+ "ARRAY" {RETURN(35);}
+ "BEGIN" {RETURN(36);}
+ "BY" {RETURN(37);}
+ "CASE" {RETURN(38);}
+ "CONST" {RETURN(39);}
+ "DEFINITION" {RETURN(40);}
+ "DIV" {RETURN(41);}
+ "DO" {RETURN(42);}
+ "ELSE" {RETURN(43);}
+ "ELSIF" {RETURN(44);}
+ "END" {RETURN(45);}
+ "EXIT" {RETURN(46);}
+ "EXPORT" {RETURN(47);}
+ "FOR" {RETURN(48);}
+ "FROM" {RETURN(49);}
+ "IF" {RETURN(50);}
+ "IMPLEMENTATION" {RETURN(51);}
+ "IMPORT" {RETURN(52);}
+ "IN" {RETURN(53);}
+ "LOOP" {RETURN(54);}
+ "MOD" {RETURN(55);}
+ "MODULE" {RETURN(56);}
+ "NOT" {RETURN(57);}
+ "OF" {RETURN(58);}
+ "OR" {RETURN(59);}
+ "POINTER" {RETURN(60);}
+ "PROCEDURE" {RETURN(61);}
+ "QUALIFIED" {RETURN(62);}
+ "RECORD" {RETURN(63);}
+ "REPEAT" {RETURN(64);}
+ "RETURN" {RETURN(65);}
+ "SET" {RETURN(66);}
+ "THEN" {RETURN(67);}
+ "TO" {RETURN(68);}
+ "TYPE" {RETURN(69);}
+ "UNTIL" {RETURN(70);}
+ "VAR" {RETURN(71);}
+ "WHILE" {RETURN(72);}
+ "WITH" {RETURN(73);}
+
+ letter (letter | digit) * {RETURN(74);}
+
+ [ \t]+ { goto std; }
+
+ "\n"
+ {
+ if(cursor == s->eof) RETURN(0);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\n", *s->tok);
+ goto std;
+ }
+*/
+comment:
+/*!re2c
+ "*)"
+ {
+ if(--depth == 0)
+ goto std;
+ else
+ goto comment;
+ }
+ "(*" { ++depth; goto comment; }
+ "\n"
+ {
+ if(cursor == s->eof) RETURN(0);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+/*
+void putStr(FILE *o, char *s, uint l){
+ while(l-- > 0)
+ putc(*s++, o);
+}
+*/
+
+main(){
+ Scanner in;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while(scan(&in)){
+/*
+ putc('<', stdout);
+ putStr(stdout, (char*) in.tok, in.cur - in.tok);
+ putc('>', stdout);
+ putc('\n', stdout);
+*/
+ }
+}
diff --git a/tools/re2c/examples/repeater.re b/tools/re2c/examples/repeater.re
new file mode 100644
index 0000000..f84b5c7
--- /dev/null
+++ b/tools/re2c/examples/repeater.re
@@ -0,0 +1,42 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#define RET(n) printf("%d\n", n); return n
+
+int scan(char *s, int l){
+char *p = s;
+char *q;
+#define YYCTYPE char
+#define YYCURSOR p
+#define YYLIMIT (s+l)
+#define YYMARKER q
+#define YYFILL(n)
+/*!re2c
+ 'a'{1}"\n" {RET(1);}
+ 'a'{2,3}"\n" {RET(2);}
+ 'a'{4,}"\n" {RET(3);}
+ 'a'{6}"\n" {RET(4);}
+ [^aq]|"\n" {RET(0);}
+*/
+}
+
+#define do_scan(str) scan(str, strlen(str))
+
+main()
+{
+ do_scan("a\n");
+ do_scan("aa\n");
+ do_scan("aaa\n");
+ do_scan("aaaa\n");
+ do_scan("A\n");
+ do_scan("AA\n");
+ do_scan("aAa\n");
+ do_scan("AaaA\n");
+ do_scan("Q");
+ do_scan("AaaAa\n");
+ do_scan("AaaAaA\n");
+ do_scan("A");
+ do_scan("\n");
+ do_scan("0");
+}
diff --git a/tools/re2c/examples/rexx/README b/tools/re2c/examples/rexx/README
new file mode 100644
index 0000000..2af0178
--- /dev/null
+++ b/tools/re2c/examples/rexx/README
@@ -0,0 +1 @@
+Replacement modules for an existing REXX interpreter. Not standalone.
diff --git a/tools/re2c/examples/rexx/rexx.l b/tools/re2c/examples/rexx/rexx.l
new file mode 100644
index 0000000..b74741d
--- /dev/null
+++ b/tools/re2c/examples/rexx/rexx.l
@@ -0,0 +1,319 @@
+#include "scanio.h"
+#include "scanner.h"
+
+#define CURSOR ch
+#define LOADCURSOR ch = *cursor;
+#define ADVANCE cursor++;
+#define BACK(n) cursor -= (n);
+#define CHECK(n) if((ScanCB.lim - cursor) < (n)){cursor = ScanFill(cursor);}
+#define MARK(n) ScanCB.ptr = cursor; sel = (n);
+#define REVERT cursor = ScanCB.ptr;
+#define MARKER sel
+
+#define RETURN(i) {ScanCB.cur = cursor; return i;}
+
+int ScanToken(){
+ uchar *cursor = ScanCB.cur;
+ unsigned sel;
+ uchar ch;
+ ScanCB.tok = cursor;
+ ScanCB.eot = NULL;
+/*!re2c
+all = [\000-\377];
+eof = [\000];
+any = all\eof;
+letter = [a-z]|[A-Z];
+digit = [0-9];
+symchr = letter|digit|[.!?_];
+const = (digit|[.])symchr*([eE][+-]?digit+)?;
+simple = (symchr\(digit|[.]))(symchr\[.])*;
+stem = simple [.];
+symbol = symchr*;
+sqstr = ['] ((any\['\n])|(['][']))* ['];
+dqstr = ["] ((any\["\n])|(["]["]))* ["];
+str = sqstr|dqstr;
+ob = [ \t]*;
+not = [\\~];
+A = [aA];
+B = [bB];
+C = [cC];
+D = [dD];
+E = [eE];
+F = [fF];
+G = [gG];
+H = [hH];
+I = [iI];
+J = [jJ];
+K = [kK];
+L = [lL];
+M = [mM];
+N = [nN];
+O = [oO];
+P = [pP];
+Q = [qQ];
+R = [rR];
+S = [sS];
+T = [tT];
+U = [uU];
+V = [vV];
+W = [wW];
+X = [xX];
+Y = [yY];
+Z = [zZ];
+*/
+
+scan:
+/*!re2c
+"\n"
+ {
+ ++(ScanCB.lineNum);
+ ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk);
+ RETURN(SU_EOL);
+ }
+"|" ob "|"
+ { RETURN(OP_CONCAT); }
+"+"
+ { RETURN(OP_PLUS); }
+"-"
+ { RETURN(OP_MINUS); }
+"*"
+ { RETURN(OP_MULT); }
+"/"
+ { RETURN(OP_DIV); }
+"%"
+ { RETURN(OP_IDIV); }
+"/" ob "/"
+ { RETURN(OP_REMAIN); }
+"*" ob "*"
+ { RETURN(OP_POWER); }
+"="
+ { RETURN(OP_EQUAL); }
+not ob "=" | "<" ob ">" | ">" ob "<"
+ { RETURN(OP_EQUAL_N); }
+">"
+ { RETURN(OP_GT); }
+"<"
+ { RETURN(OP_LT); }
+">" ob "=" | not ob "<"
+ { RETURN(OP_GE); }
+"<" ob "=" | not ob ">"
+ { RETURN(OP_LE); }
+"=" ob "="
+ { RETURN(OP_EQUAL_EQ); }
+not ob "=" ob "="
+ { RETURN(OP_EQUAL_EQ_N); }
+">" ob ">"
+ { RETURN(OP_GT_STRICT); }
+"<" ob "<"
+ { RETURN(OP_LT_STRICT); }
+">" ob ">" ob "=" | not ob "<" ob "<"
+ { RETURN(OP_GE_STRICT); }
+"<" ob "<" ob "=" | not ob ">" ob ">"
+ { RETURN(OP_LE_STRICT); }
+"&"
+ { RETURN(OP_AND); }
+"|"
+ { RETURN(OP_OR); }
+"&" ob "&"
+ { RETURN(OP_XOR); }
+not
+ { RETURN(OP_NOT); }
+
+":"
+ { RETURN(SU_COLON); }
+","
+ { RETURN(SU_COMMA); }
+"("
+ { RETURN(SU_POPEN); }
+")"
+ { RETURN(SU_PCLOSE); }
+";"
+ { RETURN(SU_EOC); }
+
+A D D R E S S
+ { RETURN(RX_ADDRESS); }
+A R G
+ { RETURN(RX_ARG); }
+C A L L
+ { RETURN(RX_CALL); }
+D O
+ { RETURN(RX_DO); }
+D R O P
+ { RETURN(RX_DROP); }
+E L S E
+ { RETURN(RX_ELSE); }
+E N D
+ { RETURN(RX_END); }
+E X I T
+ { RETURN(RX_EXIT); }
+I F
+ { RETURN(RX_IF); }
+I N T E R P R E T
+ { RETURN(RX_INTERPRET); }
+I T E R A T E
+ { RETURN(RX_ITERATE); }
+L E A V E
+ { RETURN(RX_LEAVE); }
+N O P
+ { RETURN(RX_NOP); }
+N U M E R I C
+ { RETURN(RX_NUMERIC); }
+O P T I O N S
+ { RETURN(RX_OPTIONS); }
+O T H E R W I S E
+ { RETURN(RX_OTHERWISE); }
+P A R S E
+ { RETURN(RX_PARSE); }
+P R O C E D U R E
+ { RETURN(RX_PROCEDURE); }
+P U L L
+ { RETURN(RX_PULL); }
+P U S H
+ { RETURN(RX_PUSH); }
+Q U E U E
+ { RETURN(RX_QUEUE); }
+R E T U R N
+ { RETURN(RX_RETURN); }
+S A Y
+ { RETURN(RX_SAY); }
+S E L E C T
+ { RETURN(RX_SELECT); }
+S I G N A L
+ { RETURN(RX_SIGNAL); }
+T H E N
+ { RETURN(RX_THEN); }
+T R A C E
+ { RETURN(RX_TRACE); }
+W H E N
+ { RETURN(RX_WHEN); }
+O F F
+ { RETURN(RXS_OFF); }
+O N
+ { RETURN(RXS_ON); }
+B Y
+ { RETURN(RXS_BY); }
+D I G I T S
+ { RETURN(RXS_DIGITS); }
+E N G I N E E R I N G
+ { RETURN(RXS_ENGINEERING); }
+E R R O R
+ { RETURN(RXS_ERROR); }
+E X P O S E
+ { RETURN(RXS_EXPOSE); }
+F A I L U R E
+ { RETURN(RXS_FAILURE); }
+F O R
+ { RETURN(RXS_FOR); }
+F O R E V E R
+ { RETURN(RXS_FOREVER); }
+F O R M
+ { RETURN(RXS_FORM); }
+F U Z Z
+ { RETURN(RXS_FUZZ); }
+H A L T
+ { RETURN(RXS_HALT); }
+L I N E I N
+ { RETURN(RXS_LINEIN); }
+N A M E
+ { RETURN(RXS_NAME); }
+N O T R E A D Y
+ { RETURN(RXS_NOTREADY); }
+N O V A L U E
+ { RETURN(RXS_NOVALUE); }
+S C I E N T I F I C
+ { RETURN(RXS_SCIENTIFIC); }
+S O U R C E
+ { RETURN(RXS_SOURCE); }
+S Y N T A X
+ { RETURN(RXS_SYNTAX); }
+T O
+ { RETURN(RXS_TO); }
+U N T I L
+ { RETURN(RXS_UNTIL); }
+U P P E R
+ { RETURN(RXS_UPPER); }
+V A L U E
+ { RETURN(RXS_VALUE); }
+V A R
+ { RETURN(RXS_VAR); }
+V E R S I O N
+ { RETURN(RXS_VERSION); }
+W H I L E
+ { RETURN(RXS_WHILE); }
+W I T H
+ { RETURN(RXS_WITH); }
+
+const
+ { RETURN(SU_CONST); }
+simple
+ { RETURN(SU_SYMBOL); }
+stem
+ { RETURN(SU_SYMBOL_STEM); }
+symbol
+ { RETURN(SU_SYMBOL_COMPOUND); }
+str
+ { RETURN(SU_LITERAL); }
+str [bB] / (all\symchr)
+ { RETURN(SU_LITERAL_BIN); }
+str [xX] / (all\symchr)
+ { RETURN(SU_LITERAL_HEX); }
+
+eof
+ { RETURN(SU_EOF); }
+any
+ { RETURN(SU_ERROR); }
+*/
+}
+
+bool StripToken(){
+ uchar *cursor = ScanCB.cur;
+ unsigned depth;
+ uchar ch;
+ bool blanks = FALSE;
+ ScanCB.eot = cursor;
+strip:
+/*!re2c
+"/*"
+ {
+ depth = 1;
+ goto comment;
+ }
+"\r"
+ { goto strip; }
+[ \t]
+ {
+ blanks = TRUE;
+ goto strip;
+ }
+[] / all
+ { RETURN(blanks); }
+*/
+
+comment:
+/*!re2c
+"*/"
+ {
+ if(--depth == 0)
+ goto strip;
+ else
+ goto comment;
+ }
+"\n"
+ {
+ ++(ScanCB.lineNum);
+ ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk);
+ goto comment;
+ }
+"/*"
+ {
+ ++depth;
+ goto comment;
+ }
+eof
+ { RETURN(blanks); }
+any
+ {
+ goto comment;
+ }
+*/
+}
diff --git a/tools/re2c/examples/rexx/scanio.c b/tools/re2c/examples/rexx/scanio.c
new file mode 100644
index 0000000..de6898d
--- /dev/null
+++ b/tools/re2c/examples/rexx/scanio.c
@@ -0,0 +1,41 @@
+uchar *ScanFill(uchar *cursor){
+ unsigned cnt = s->tok - s->bot;
+ s->pos += cursor - s->mrk;
+ if(cnt){
+ if(s->eot){
+ unsigned len = s->eot - s->tok;
+ memcpy(s->bot, s->tok, len);
+ s->eot = &s->bot[len];
+ if((len = s->lim - cursor) != 0)
+ memcpy(s->eot, cursor, len);
+ cursor = s->eot;
+ s->lim = &cursor[len];
+ } else {
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ cursor -= cnt;
+ s->lim -= cnt;
+ }
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ }
+ if((s->top - s->lim) < 512){
+ uchar *buf = (uchar*) malloc(((s->lim - s->bot) + 512)*sizeof(uchar));
+ memcpy(buf, s->bot, s->lim - s->bot);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ if(s->eot)
+ s->eot = &buf[s->eot - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[512];
+ free(s->bot);
+ s->bot = buf;
+ }
+ s->mrk = cursor;
+ if(ScanCBIO.file){
+ if((cnt = read(ScanCBIO.u.f.fd, (char*) s->lim, 512)) != 512)
+ memset(&s->lim[cnt], 0, 512 - cnt);
+ s->lim += 512;
+ }
+ return cursor;
+}
diff --git a/tools/re2c/examples/sample.re b/tools/re2c/examples/sample.re
new file mode 100644
index 0000000..2f497a3
--- /dev/null
+++ b/tools/re2c/examples/sample.re
@@ -0,0 +1,7 @@
+/*!re2c
+ "print" {return PRINT;}
+ [a-z]+ {return ID;}
+ [0-9]+ {return DEC;}
+ "0x" [0-9a-f]+ {return HEX;}
+ [\000-\377] {return ERR;}
+*/
diff --git a/tools/re2c/examples/simple.re b/tools/re2c/examples/simple.re
new file mode 100644
index 0000000..5fd8891
--- /dev/null
+++ b/tools/re2c/examples/simple.re
@@ -0,0 +1,13 @@
+#define NULL ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE char
+#define YYCURSOR p
+#define YYLIMIT p
+#define YYMARKER q
+#define YYFILL(n)
+/*!re2c
+ [0-9]+ {return YYCURSOR;}
+ [\000-\377] {return NULL;}
+*/
+}
diff --git a/tools/re2c/globals.h b/tools/re2c/globals.h
new file mode 100644
index 0000000..344f3de
--- /dev/null
+++ b/tools/re2c/globals.h
@@ -0,0 +1,26 @@
+#ifndef re2c_globals_h
+#define re2c_globals_h
+
+#include "tools/re2c/basics.h"
+
+extern const char *fileName;
+extern char *outputFileName;
+extern int sFlag;
+extern int bFlag;
+extern int dFlag;
+extern int iFlag;
+extern int bUsedYYAccept;
+extern unsigned int oline;
+extern unsigned int maxFill;
+extern int vFillIndexes;
+extern unsigned char *vUsedLabels;
+extern unsigned int vUsedLabelAlloc;
+
+extern unsigned char asc2ebc[256];
+extern unsigned char ebc2asc[256];
+
+extern unsigned char *xlat, *talx;
+
+char *mystrdup(const char *str);
+
+#endif
diff --git a/tools/re2c/ins.h b/tools/re2c/ins.h
new file mode 100644
index 0000000..ba6c087
--- /dev/null
+++ b/tools/re2c/ins.h
@@ -0,0 +1,40 @@
+#ifndef re2c_ins_h
+#define re2c_ins_h
+
+#include "tools/re2c/basics.h"
+
+#define nChars 256
+typedef unsigned char Char;
+
+#define CHAR 0
+#define GOTO 1
+#define FORK 2
+#define TERM 3
+#define CTXT 4
+
+typedef union Ins {
+ struct {
+ byte tag;
+ byte marked;
+ void *link;
+ } i;
+ struct {
+ unsigned short value;
+ unsigned short bump;
+ void *link;
+ } c;
+} Ins;
+
+static int isMarked(Ins *i){
+ return i->i.marked != 0;
+}
+
+static void mark(Ins *i){
+ i->i.marked = 1;
+}
+
+static void unmark(Ins *i){
+ i->i.marked = 0;
+}
+
+#endif
diff --git a/tools/re2c/main.c b/tools/re2c/main.c
new file mode 100644
index 0000000..9484316
--- /dev/null
+++ b/tools/re2c/main.c
@@ -0,0 +1,196 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "globals.h"
+#include "parse.h"
+#include "dfa.h"
+#include "mbo_getopt.h"
+
+const char *fileName = 0;
+char *outputFileName = 0;
+int sFlag = 0;
+int bFlag = 0;
+int dFlag = 0;
+int iFlag = 0;
+int bUsedYYAccept = 0;
+unsigned int oline = 1;
+unsigned int maxFill = 1;
+int vFillIndexes = -1;
+unsigned char *vUsedLabels;
+unsigned int vUsedLabelAlloc = 1000;
+
+static char *opt_arg = NULL;
+static int opt_ind = 1;
+
+static const mbo_opt_struct OPTIONS[] = {
+ {'?', 0, "help"},
+ {'b', 0, "bit-vectors"},
+ {'d', 0, "debug-output"},
+ {'e', 0, "ecb"},
+ {'f', 0, "storable-state"},
+ {'h', 0, "help"},
+ {'i', 0, "no-debug-info"},
+ {'o', 1, "output"},
+ {'s', 0, "nested-ifs"},
+ {'v', 0, "version"},
+ {'-', 0, NULL} /* end of args */
+};
+
+static void usage()
+{
+ fprintf(stderr,
+ "usage: re2c [-esbvhd] file\n"
+ "\n"
+ "-? -h --help Display this info.\n"
+ "\n"
+ "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n"
+ " coax better code out of the compiler. Most useful for\n");
+ fprintf(stderr,
+ " specifications with more than a few keywords (e.g. for\n"
+ " most programming languages).\n"
+ "\n"
+ "-e --ecb Cross-compile from an ASCII platform to\n"
+ " an EBCDIC one.\n"
+ "\n");
+ fprintf(stderr,
+ "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n"
+ " need this assist to generate better code.\n"
+ "\n"
+ "-f --storable-state Generate a scanner with support for storable state\n"
+ "\n"
+ "-o --output=output Specify the output file instead of stdout\n"
+ "\n");
+ fprintf(stderr,
+ "-d --debug-output Creates a parser that dumps information during\n"
+ " about the current position and in which state the\n"
+ " parser is.\n"
+ "\n"
+ "-i --no-debug-info Do not generate '#line' info (usefull for versioning).\n"
+ "\n"
+ "-v --version Show version information.\n"
+ "-V --vernum Show version as one number.\n");
+}
+
+char *
+mystrdup(const char *str)
+{
+ size_t len;
+ char *copy;
+
+ len = strlen(str) + 1;
+ copy = malloc(len);
+ memcpy(copy, str, len);
+ return (copy);
+}
+
+int main(int argc, char *argv[])
+{
+ int c;
+ FILE *f, *output;
+
+ fileName = NULL;
+
+ if(argc == 1) {
+ usage();
+ return 2;
+ }
+
+ while ((c = mbo_getopt(argc, argv, OPTIONS, &opt_arg, &opt_ind, 0))!=-1) {
+ switch (c) {
+ case 'b':
+ sFlag = 1;
+ bFlag = 1;
+ break;
+ case 'e':
+ xlat = asc2ebc;
+ talx = ebc2asc;
+ break;
+ case 's':
+ sFlag = 1;
+ break;
+ case 'd':
+ dFlag = 1;
+ break;
+ case 'f':
+ vFillIndexes = 0;
+ break;
+ case 'i':
+ iFlag = 1;
+ break;
+ case 'o':
+ outputFileName = opt_arg;
+ break;
+ case 'v':
+ fputs("re2c " PACKAGE_VERSION "\n", stdout);
+ break;
+ case 'V': {
+ int v1, v2, v3;
+ sscanf(PACKAGE_VERSION, "%d.%d.%d", &v1, &v2, &v3);
+ fprintf(stdout, "%02d%02d%02d\n", v1, v2, v3);
+ return 2;
+ }
+ case 'h':
+ case '?':
+ default:
+ usage();
+ return 2;
+ }
+ }
+
+ if (argc == opt_ind + 1) {
+ fileName = argv[opt_ind];
+ } else {
+ usage();
+ return 2;
+ }
+
+ vUsedLabels = calloc(vUsedLabelAlloc, 1);
+ if (!vUsedLabels) {
+ fputs("Out of memory.\n", stderr);
+ return 1;
+ }
+
+ /* set up the input stream */
+ if(fileName[0] == '-' && fileName[1] == '\0'){
+ fileName = "<stdin>";
+ f = stdin;
+ } else {
+ if((f = fopen(fileName, "rt")) == NULL){
+ fprintf(stderr, "can't open %s\n", fileName);
+ return 1;
+ }
+ }
+
+ /* set up the output stream */
+ if (outputFileName == 0 || (fileName[0] == '-' && fileName[1] == '\0')) {
+ outputFileName = mystrdup("<stdout>");
+ output = stdout;
+ } else {
+ int len;
+ char *src, *dst, *tmp;
+
+ output = fopen(outputFileName, "wt");
+ if (!output) {
+ fprintf(stderr, "can't open %s\n", outputFileName);
+ return 1;
+ }
+
+ len = strlen(outputFileName);
+ tmp = (char*)malloc((len+1)*2);
+
+ for (src = outputFileName, dst = tmp; *src; ++src)
+ {
+ if (*src == '\\')
+ *dst++ = *src;
+ *dst++ = *src;
+ }
+ *dst = '\0';
+
+ outputFileName = tmp;
+ }
+
+ parse(f, output);
+ free(outputFileName);
+ return 0;
+}
diff --git a/tools/re2c/mbo_getopt.c b/tools/re2c/mbo_getopt.c
new file mode 100644
index 0000000..f4553dc
--- /dev/null
+++ b/tools/re2c/mbo_getopt.c
@@ -0,0 +1,194 @@
+/*
+ Author: Marcus Boerger <helly@users.sourceforge.net>
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include "mbo_getopt.h"
+#define OPTERRCOLON (1)
+#define OPTERRNF (2)
+#define OPTERRARG (3)
+
+static int mbo_opt_error(int argc, char * const *argv, int oint, int optchr, int err, int show_err)
+{
+ if (show_err)
+ {
+ fprintf(stderr, "Error in argument %d, char %d: ", oint, optchr + 1);
+
+ switch (err)
+ {
+
+ case OPTERRCOLON:
+ fprintf(stderr, ": in flags\n");
+ break;
+
+ case OPTERRNF:
+ fprintf(stderr, "option not found %c\n", argv[oint][optchr]);
+ break;
+
+ case OPTERRARG:
+ fprintf(stderr, "no argument for option %c\n", argv[oint][optchr]);
+ break;
+
+ default:
+ fprintf(stderr, "unknown\n");
+ break;
+ }
+ }
+
+ return ('?');
+}
+
+int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct opts[], char **optarg, int *optind, int show_err)
+{
+ static int optchr = 0;
+ static int dash = 0; /* have already seen the - */
+ int arg_start = 2;
+
+ int opts_idx = -1;
+
+ if (*optind >= argc)
+ {
+ return (EOF);
+ }
+
+ if (!dash)
+ {
+ if ((argv[*optind][0] != '-'))
+ {
+ return (EOF);
+ }
+ else
+ {
+ if (!argv[*optind][1])
+ {
+ /*
+ * use to specify stdin. Need to let pgm process this and
+ * the following args
+ */
+ return (EOF);
+ }
+ }
+ }
+
+ if ((argv[*optind][0] == '-') && (argv[*optind][1] == '-'))
+ {
+ /* '--' indicates end of args if not followed by a known long option name */
+
+ while (1)
+ {
+ opts_idx++;
+
+ if (opts[opts_idx].opt_char == '-')
+ {
+ (*optind)++;
+ return (EOF);
+ }
+ else if (opts[opts_idx].opt_name && !strcmp(&argv[*optind][2], opts[opts_idx].opt_name))
+ {
+ break;
+ }
+ }
+
+ optchr = 0;
+ dash = 1;
+ arg_start = 2 + strlen(opts[opts_idx].opt_name);
+ }
+
+ if (!dash)
+ {
+ dash = 1;
+ optchr = 1;
+ }
+
+ /* Check if the guy tries to do a -: kind of flag */
+ if (argv[*optind][optchr] == ':')
+ {
+ dash = 0;
+ (*optind)++;
+ return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRCOLON, show_err));
+ }
+
+ if (opts_idx < 0)
+ {
+ while (1)
+ {
+ opts_idx++;
+
+ if (opts[opts_idx].opt_char == '-')
+ {
+ int errind = *optind;
+ int errchr = optchr;
+
+ if (!argv[*optind][optchr + 1])
+ {
+ dash = 0;
+ (*optind)++;
+ }
+ else
+ {
+ optchr++;
+ }
+
+ return (mbo_opt_error(argc, argv, errind, errchr, OPTERRNF, show_err));
+ }
+ else if (argv[*optind][optchr] == opts[opts_idx].opt_char)
+ {
+ break;
+ }
+ }
+ }
+
+ if (opts[opts_idx].need_param)
+ {
+ /* Check for cases where the value of the argument
+ is in the form -<arg> <val> or in the form -<arg><val> */
+ dash = 0;
+
+ if (!argv[*optind][arg_start])
+ {
+ (*optind)++;
+
+ if (*optind == argc)
+ {
+ return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err));
+ }
+
+ *optarg = argv[(*optind)++];
+ }
+ else
+ {
+ *optarg = &argv[*optind][arg_start];
+ (*optind)++;
+ }
+
+ return opts[opts_idx].opt_char;
+ }
+ else
+ {
+ if (arg_start == 2)
+ {
+ if (!argv[*optind][optchr + 1])
+ {
+ dash = 0;
+ (*optind)++;
+ }
+ else
+ {
+ optchr++;
+ }
+ }
+ else
+ {
+ (*optind)++;
+ }
+
+ return opts[opts_idx].opt_char;
+ }
+
+ assert(0);
+ return (0); /* never reached */
+}
+
diff --git a/tools/re2c/mbo_getopt.h b/tools/re2c/mbo_getopt.h
new file mode 100644
index 0000000..8f962fd
--- /dev/null
+++ b/tools/re2c/mbo_getopt.h
@@ -0,0 +1,22 @@
+/*
+ Author: Marcus Boerger <helly@users.sourceforge.net>
+*/
+
+/* Define structure for one recognized option (both single char and long name).
+ * If short_open is '-' this is the last option.
+ */
+
+#ifndef RE2C_MBO_GETOPT_H_INCLUDE_GUARD_
+#define RE2C_MBO_GETOPT_H_INCLUDE_GUARD_
+
+typedef struct mbo_opt_struct
+{
+ const char opt_char;
+ const int need_param;
+ const char * opt_name;
+} mbo_opt_struct;
+
+int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct opts[], char **optarg, int *optind, int show_err);
+
+#endif
+
diff --git a/tools/re2c/parse.h b/tools/re2c/parse.h
new file mode 100644
index 0000000..92077ca
--- /dev/null
+++ b/tools/re2c/parse.h
@@ -0,0 +1,29 @@
+#ifndef re2c_parse_h
+#define re2c_parse_h
+
+#include <stdio.h>
+#include "tools/re2c/scanner.h"
+#include "tools/re2c/re.h"
+
+typedef struct Symbol {
+ struct Symbol *next;
+ Str name;
+ RegExp *re;
+} Symbol;
+
+void Symbol_init(Symbol *, const SubStr*);
+static Symbol *Symbol_new(const SubStr*);
+Symbol *Symbol_find(const SubStr*);
+
+void line_source(FILE *, unsigned int);
+void parse(FILE *, FILE *);
+
+static Symbol *
+Symbol_new(const SubStr *str)
+{
+ Symbol *r = malloc(sizeof(Symbol));
+ Symbol_init(r, str);
+ return r;
+}
+
+#endif
diff --git a/tools/re2c/parser.c b/tools/re2c/parser.c
new file mode 100644
index 0000000..02d5c66
--- /dev/null
+++ b/tools/re2c/parser.c
@@ -0,0 +1,249 @@
+#include <time.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "tools/re2c/globals.h"
+#include "tools/re2c/parse.h"
+#include "tools/re2c/parser.h"
+
+int yylex(void);
+static RegExp *parse_expr(void);
+static RegExp *parse_diff(void);
+static RegExp *parse_term(void);
+static RegExp *parse_factor(void);
+static RegExp *parse_primary(void);
+
+static unsigned int accept;
+static RegExp *spec;
+static Scanner *in;
+
+static int curtok, peektok;
+yystype yylval;
+static yystype peekval;
+
+#define get_next_token() (curtok = yylex())
+
+static void
+get_peek_token(void)
+{
+ yystype temp = yylval; /* structure copy */
+ if (peektok != NONE)
+ Scanner_fatal(in, "more than one token of lookahead?");
+ peektok = yylex();
+ peekval = yylval; /* structure copy */
+ yylval = temp;
+}
+
+static void
+yyparse(void)
+{
+ RegExp *re, *look;
+
+ accept = 0;
+ spec = NULL;
+ get_next_token();
+ while (curtok != 0) {
+ switch (curtok) {
+ case ID:
+ get_peek_token();
+ if (peektok == '=') {
+ /* ID = expr; */
+ Symbol *sym = yylval.symbol;
+ get_next_token(); /* id */
+ get_next_token(); /* = */
+ re = parse_expr();
+ if (curtok != ';')
+ Scanner_fatal(in, "missing `;' after regexp");
+ get_next_token(); /* ; */
+ if (sym->re)
+ Scanner_fatal(in, "sym already defined");
+ sym->re = re;
+ break;
+ }
+ /*@fallthrough@*/
+ default:
+ /* rule: expr [/ expr] CODE */
+ re = parse_expr();
+ if (!re)
+ Scanner_fatal(in, "expression syntax error");
+
+ if (curtok == '/') {
+ get_next_token(); /* / */
+ look = parse_expr();
+ } else
+ look = RegExp_new_NullOp();
+
+ if (curtok != CODE)
+ Scanner_fatal(in, "missing code after regexp");
+ re = RegExp_new_RuleOp(re, look, yylval.token, accept++);
+ get_next_token(); /* CODE */
+ spec = spec ? mkAlt(spec, re) : re;
+ }
+ }
+}
+
+static RegExp *
+parse_expr(void)
+{
+ RegExp *e, *f;
+ e = parse_diff();
+ while (curtok == '|') {
+ get_next_token(); /* | */
+ f = parse_diff();
+ e = mkAlt(e, f);
+ }
+ return e;
+}
+
+static RegExp *
+parse_diff(void)
+{
+ RegExp *e, *f;
+ e = parse_term();
+ while (curtok == '\\') {
+ get_next_token(); /* \ */
+ f = parse_term();
+ e = mkDiff(e, f);
+ if(!e)
+ Scanner_fatal(in, "can only difference char sets");
+ }
+ return e;
+}
+
+static RegExp *
+parse_term(void)
+{
+ RegExp *e, *f;
+ e = parse_factor();
+ while ((f = parse_factor())) {
+ e = RegExp_new_CatOp(e, f);
+ }
+ return e;
+}
+
+static RegExp *
+parse_factor(void)
+{
+ RegExp *e;
+ char ch;
+ e = parse_primary();
+ while (curtok == CLOSE || curtok == CLOSESIZE) {
+ switch (curtok) {
+ case CLOSE:
+ ch = yylval.op;
+ while (get_next_token() == CLOSE) {
+ if (ch != yylval.op)
+ ch = '*';
+ }
+ switch (ch) {
+ case '*':
+ e = mkAlt(RegExp_new_CloseOp(e), RegExp_new_NullOp());
+ break;
+ case '+':
+ e = RegExp_new_CloseOp(e);
+ break;
+ case '?':
+ e = mkAlt(e, RegExp_new_NullOp());
+ break;
+ }
+ break;
+ case CLOSESIZE:
+ e = RegExp_new_CloseVOp(e, yylval.extop.minsize,
+ yylval.extop.maxsize);
+ get_next_token(); /* CLOSESIZE */
+ break;
+ default:
+ Scanner_fatal(in, "parse error");
+ break;
+ }
+ }
+ return e;
+}
+
+static RegExp *
+parse_primary(void)
+{
+ RegExp *e;
+ switch (curtok) {
+ case ID:
+ if (!yylval.symbol->re)
+ Scanner_fatal(in, "can't find symbol");
+ e = yylval.symbol->re;
+ get_next_token();
+ break;
+ case RANGE:
+ case STRING:
+ e = yylval.regexp;
+ get_next_token();
+ break;
+ case '(':
+ get_next_token();
+ e = parse_expr();
+ if (curtok != ')')
+ Scanner_fatal(in, "missing closing parenthesis");
+ get_next_token();
+ break;
+ default:
+ return NULL;
+ }
+ return e;
+}
+
+int
+yylex(void)
+{
+ if (peektok != NONE) {
+ int tok = peektok;
+ yylval = peekval;
+ peektok = NONE;
+ return tok;
+ }
+ return Scanner_scan(in);
+}
+
+void line_source(FILE *o, unsigned int line)
+{
+ char * fnamebuf;
+ char * token;
+
+ if (iFlag)
+ return;
+ fprintf(o, "#line %u \"", line);
+ if( fileName != NULL ) {
+ fnamebuf = mystrdup( fileName );
+ } else {
+ fnamebuf = mystrdup( "<stdin>" );
+ }
+ token = strtok( fnamebuf, "\\" );
+ for(;;) {
+ fprintf(o, "%s", token);
+ token = strtok( NULL, "\\" );
+ if( token == NULL ) break;
+ fputs("\\\\", o);
+ }
+ fputs("\"\n", o); oline++;
+ free( fnamebuf );
+}
+
+void parse(FILE *i, FILE *o){
+ time_t now;
+
+ time(&now);
+
+ peektok = NONE;
+
+ fputs("/* Generated by re2c 0.9.1-C on ", o);
+ fprintf(o, "%-24s", ctime(&now));
+ fputs(" */\n", o); oline+=2;
+
+ in = Scanner_new(i);
+
+ line_source(o, Scanner_line(in));
+
+ while(Scanner_echo(in, o)){
+ yyparse();
+ if(spec)
+ genCode(o, spec);
+ line_source(o, Scanner_line(in));
+ }
+}
diff --git a/tools/re2c/parser.h b/tools/re2c/parser.h
new file mode 100644
index 0000000..c433a99
--- /dev/null
+++ b/tools/re2c/parser.h
@@ -0,0 +1,33 @@
+#ifndef RE2C_PARSER_H
+#define RE2C_PARSER_H
+
+/* Tokens */
+enum yytokentype {
+ CLOSESIZE = 258,
+ CLOSE = 259,
+ ID = 260,
+ CODE = 261,
+ RANGE = 262,
+ STRING = 263,
+ NONE = 264
+};
+
+#define CLOSESIZE 258
+#define CLOSE 259
+#define ID 260
+#define CODE 261
+#define RANGE 262
+#define STRING 263
+#define NONE 264
+
+typedef union {
+ Symbol *symbol;
+ RegExp *regexp;
+ Token *token;
+ char op;
+ ExtOp extop;
+} yystype;
+
+extern yystype yylval;
+
+#endif
diff --git a/tools/re2c/re.h b/tools/re2c/re.h
new file mode 100644
index 0000000..b45208b
--- /dev/null
+++ b/tools/re2c/re.h
@@ -0,0 +1,191 @@
+#ifndef re2c_re_h
+#define re2c_re_h
+
+#include <stdio.h>
+#include "tools/re2c/token.h"
+#include "tools/re2c/ins.h"
+
+typedef struct extop {
+ char op;
+ int minsize;
+ int maxsize;
+} ExtOp;
+
+typedef struct CharPtn {
+ unsigned int card;
+ struct CharPtn *fix;
+ struct CharPtn *nxt;
+} CharPtn;
+
+typedef struct CharSet {
+ CharPtn *fix;
+ CharPtn *freeHead, **freeTail;
+ CharPtn *rep[nChars];
+ CharPtn ptn[nChars];
+} CharSet;
+
+typedef struct Range {
+ struct Range *next;
+ unsigned int lb, ub; /* [lb,ub) */
+} Range;
+
+static void
+Range_init(Range *r, unsigned int l, unsigned int u)
+{
+ r->next = NULL;
+ r->lb = l;
+ r->ub = u;
+}
+
+static Range *
+Range_new(unsigned int l, unsigned int u)
+{
+ Range *r = malloc(sizeof(Range));
+ r->next = NULL;
+ r->lb = l;
+ r->ub = u;
+ return r;
+}
+
+static void
+Range_copy(Range *ro, const Range *r)
+{
+ ro->next = NULL;
+ ro->lb = r->lb;
+ ro->ub = r->ub;
+}
+
+static Range *
+Range_new_copy(Range *r)
+{
+ Range *ro = malloc(sizeof(Range));
+ ro->next = NULL;
+ ro->lb = r->lb;
+ ro->ub = r->ub;
+ return ro;
+}
+
+void Range_out(FILE *, const Range *);
+
+typedef enum {
+ NULLOP = 1,
+ MATCHOP,
+ RULEOP,
+ ALTOP,
+ CATOP,
+ CLOSEOP,
+ CLOSEVOP
+} RegExpType;
+
+typedef struct RegExp {
+ RegExpType type;
+ unsigned int size;
+ union {
+ /* for MatchOp */
+ Range *match;
+ /* for RuleOp */
+ struct {
+ struct RegExp *exp;
+ struct RegExp *ctx;
+ Ins *ins;
+ unsigned int accept;
+ Token *code;
+ unsigned int line;
+ } RuleOp;
+ /* for AltOp and CatOp*/
+ struct {
+ struct RegExp *exp1, *exp2;
+ } AltCatOp;
+ /* for CloseOp */
+ struct RegExp *exp;
+ /* for CloseVOp*/
+ struct {
+ struct RegExp *exp;
+ int min;
+ int max;
+ } CloseVOp;
+ } d;
+} RegExp;
+
+static RegExp *
+RegExp_isA(RegExp *r, RegExpType t)
+{
+ return r->type == t ? r : NULL;
+}
+
+void RegExp_split(RegExp*, CharSet*);
+void RegExp_calcSize(RegExp*, Char*);
+unsigned int RegExp_fixedLength(RegExp*);
+void RegExp_compile(RegExp*, Char*, Ins*);
+void RegExp_display(RegExp*, FILE *);
+
+static RegExp *
+RegExp_new_NullOp(void)
+{
+ RegExp *r = malloc(sizeof(RegExp));
+ r->type = NULLOP;
+ return r;
+}
+
+static RegExp *
+RegExp_new_MatchOp(Range *m)
+{
+ RegExp *r = malloc(sizeof(RegExp));
+ r->type = MATCHOP;
+ r->d.match = m;
+ return r;
+}
+
+RegExp *RegExp_new_RuleOp(RegExp*, RegExp*, Token*, unsigned int);
+
+static RegExp *
+RegExp_new_AltOp(RegExp *e1, RegExp *e2)
+{
+ RegExp *r = malloc(sizeof(RegExp));
+ r->type = ALTOP;
+ r->d.AltCatOp.exp1 = e1;
+ r->d.AltCatOp.exp2 = e2;
+ return r;
+}
+
+static RegExp *
+RegExp_new_CatOp(RegExp *e1, RegExp *e2)
+{
+ RegExp *r = malloc(sizeof(RegExp));
+ r->type = CATOP;
+ r->d.AltCatOp.exp1 = e1;
+ r->d.AltCatOp.exp2 = e2;
+ return r;
+}
+
+static RegExp *
+RegExp_new_CloseOp(RegExp *e)
+{
+ RegExp *r = malloc(sizeof(RegExp));
+ r->type = CLOSEOP;
+ r->d.exp = e;
+ return r;
+}
+
+static RegExp *
+RegExp_new_CloseVOp(RegExp *e, int lb, int ub)
+{
+ RegExp *r = malloc(sizeof(RegExp));
+ r->type = CLOSEVOP;
+ r->d.CloseVOp.exp = e;
+ r->d.CloseVOp.min = lb;
+ r->d.CloseVOp.max = ub;
+ return r;
+}
+
+extern void genCode(FILE *, RegExp*);
+extern RegExp *mkDiff(RegExp*, RegExp*);
+extern RegExp *mkDot(void);
+extern RegExp *strToRE(SubStr);
+extern RegExp *strToCaseInsensitiveRE(SubStr);
+extern RegExp *ranToRE(SubStr);
+extern RegExp *invToRE(SubStr);
+
+extern RegExp *mkAlt(RegExp*, RegExp*);
+
+#endif
diff --git a/tools/re2c/re2c.1 b/tools/re2c/re2c.1
new file mode 100644
index 0000000..d69f94d
--- /dev/null
+++ b/tools/re2c/re2c.1
@@ -0,0 +1,536 @@
+.ds re \fBre2c\fP
+.ds le \fBlex\fP
+.ds rx regular expression
+.ds lx \fIl\fP-expression
+.TH RE2C 1 "8 April 1994" "Version 0.5"
+\"$Log: re2c.1,v $
+\"Revision 1.1 2002/04/07 22:27:06 peter
+\"Initial revision
+\"
+\"Revision 1.2 1994/04/16 15:50:32 peterr
+\"Fix bug in simple example.
+\"
+\"Revision 1.1 1994/04/08 15:39:09 peterr
+\"Initial revision
+\"
+.SH NAME
+re2c \- convert regular expressions to C/C++
+
+.SH SYNOPSIS
+\*(re [\fB-esb\fP] \fIname\fP
+
+.SH DESCRIPTION
+\*(re is a preprocessor that generates C-based recognizers from regular
+expressions.
+The input to \*(re consists of C/C++ source interleaved with
+comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain
+scanner specifications.
+In the output these comments are replaced with code that, when
+executed, will find the next input token and then execute
+some user-supplied token-specific code.
+
+For example, given the following code
+
+.in +3
+.nf
+#define NULL ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE char
+#define YYCURSOR p
+#define YYLIMIT p
+#define YYMARKER q
+#define YYFILL(n)
+/*!re2c
+ [0-9]+ {return YYCURSOR;}
+ [\\000-\\377] {return NULL;}
+*/
+}
+.fi
+.in -3
+
+\*(re will generate
+
+.in +3
+.nf
+/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
+#line 1 "simple.re"
+#define NULL ((char*) 0)
+char *scan(char *p){
+char *q;
+#define YYCTYPE char
+#define YYCURSOR p
+#define YYLIMIT p
+#define YYMARKER q
+#define YYFILL(n)
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+yy1: ++YYCURSOR;
+yy0:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '/') goto yy4;
+ if(yych >= ':') goto yy4;
+yy2: yych = *++YYCURSOR;
+ goto yy7;
+yy3:
+#line 10
+ {return YYCURSOR;}
+yy4: yych = *++YYCURSOR;
+yy5:
+#line 11
+ {return NULL;}
+yy6: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy7: if(yych <= '/') goto yy3;
+ if(yych <= '9') goto yy6;
+ goto yy3;
+}
+#line 12
+
+}
+.fi
+.in -3
+
+.SH OPTIONS
+\*(re provides the following options:
+.TP
+\fB-e\fP
+Cross-compile from an ASCII platform to an EBCDIC one.
+.TP
+\fB-s\fP
+Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this
+assist to generate better code.
+.TP
+\fB-b\fP
+Implies \fB-s\fP. Use bit vectors as well in the attempt to coax better
+code out of the compiler. Most useful for specifications with more than a
+few keywords (e.g. for most programming languages).
+
+.SH "INTERFACE CODE"
+Unlike other scanner generators, \*(re does not generate complete scanners:
+the user must supply some interface code.
+In particular, the user must define the following macros:
+.TP
+\fCYYCHAR\fP
+Type used to hold an input symbol.
+Usually \fCchar\fP or \fCunsigned char\fP.
+.TP
+\fCYYCURSOR\fP
+\*(lx of type \fC*YYCHAR\fP that points to the current input symbol.
+The generated code advances \fCYYCURSOR\fP as symbols are matched.
+On entry, \fCYYCURSOR\fP is assumed to point to the first character of the
+current token. On exit, \fCYYCURSOR\fP will point to the first character of
+the following token.
+.TP
+\fCYLIMIT\fP
+Expression of type \fC*YYCHAR\fP that marks the end of the buffer
+(\fCYLIMIT[-1]\fP is the last character in the buffer).
+The generated code repeatedly compares \fCYYCURSOR\fP to \fCYLIMIT\fP
+to determine when the buffer needs (re)filling.
+.TP
+\fCYYMARKER\fP
+\*(lx of type \fC*YYCHAR\fP.
+The generated code saves backtracking information in \fCYYMARKER\fP.
+.TP
+\fCYYFILL(\fP\fIn\fP\fC)\fP
+The generated code "calls" \fCYYFILL\fP when the buffer needs
+(re)filling: at least \fIn\fP additional characters should
+be provided. \fCYYFILL\fP should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP and
+\fCYYMARKER\fP as needed. Note that for typical programming languages
+\fIn\fP will be the length of the longest keyword plus one.
+
+.SH "SCANNER SPECIFICATIONS"
+Each scanner specification consists of a set of \fIrules\fP and name
+definitions.
+Rules consist of a regular expression along with a block of C/C++ code that
+is to be executed when the associated regular expression is matched.
+Name definitions are of the form
+``\fIname\fP \fC=\fP \fIregular expression\fP\fC;\fP''.
+
+.SH "SUMMARY OF RE2C REGULAR EXPRESSIONS"
+.TP
+\fC"foo"\fP
+the literal string \fCfoo\fP.
+ANSI-C escape sequences can be used.
+.TP
+\fC[xyz]\fP
+a "character class"; in this case,
+the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'.
+.TP
+\fC[abj-oZ]\fP
+a "character class" with a range in it;
+matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP',
+or a '\fCZ\fP'.
+.TP
+\fIr\fP\fC\e\fP\fIs\fP
+match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions
+which can be expressed as character classes.
+.TP
+\fIr\fP\fC*\fP
+zero or more \fIr\fP's, where \fIr\fP is any regular expression
+.TP
+\fC\fIr\fP\fC+\fP
+one or more \fIr\fP's
+.TP
+\fC\fIr\fP\fC?\fP
+zero or one \fIr\fP's (that is, "an optional \fIr\fP")
+.TP
+name
+the expansion of the "name" definition (see above)
+.TP
+\fC(\fP\fIr\fP\fC)\fP
+an \fIr\fP; parentheses are used to override precedence
+(see below)
+.TP
+\fIrs\fP
+an \fIr\fP followed by an \fIs\fP ("concatenation")
+.TP
+\fIr\fP\fC|\fP\fIs\fP
+either an \fIr\fP or an \fIs\fP
+.TP
+\fIr\fP\fC/\fP\fIs\fP
+an \fIr\fP but only if it is followed by an \fIs\fP. The s is not part of
+the matched text. This type of \*(rx is called "trailing context".
+.LP
+The regular expressions listed above are grouped according to
+precedence, from highest precedence at the top to lowest at the bottom.
+Those grouped together have equal precedence.
+
+.SH "A LARGER EXAMPLE"
+.LP
+.in +3
+.nf
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+
+#define ADDEQ 257
+#define ANDAND 258
+#define ANDEQ 259
+#define ARRAY 260
+#define ASM 261
+#define AUTO 262
+#define BREAK 263
+#define CASE 264
+#define CHAR 265
+#define CONST 266
+#define CONTINUE 267
+#define DECR 268
+#define DEFAULT 269
+#define DEREF 270
+#define DIVEQ 271
+#define DO 272
+#define DOUBLE 273
+#define ELLIPSIS 274
+#define ELSE 275
+#define ENUM 276
+#define EQL 277
+#define EXTERN 278
+#define FCON 279
+#define FLOAT 280
+#define FOR 281
+#define FUNCTION 282
+#define GEQ 283
+#define GOTO 284
+#define ICON 285
+#define ID 286
+#define IF 287
+#define INCR 288
+#define INT 289
+#define LEQ 290
+#define LONG 291
+#define LSHIFT 292
+#define LSHIFTEQ 293
+#define MODEQ 294
+#define MULEQ 295
+#define NEQ 296
+#define OREQ 297
+#define OROR 298
+#define POINTER 299
+#define REGISTER 300
+#define RETURN 301
+#define RSHIFT 302
+#define RSHIFTEQ 303
+#define SCON 304
+#define SHORT 305
+#define SIGNED 306
+#define SIZEOF 307
+#define STATIC 308
+#define STRUCT 309
+#define SUBEQ 310
+#define SWITCH 311
+#define TYPEDEF 312
+#define UNION 313
+#define UNSIGNED 314
+#define VOID 315
+#define VOLATILE 316
+#define WHILE 317
+#define XOREQ 318
+#define EOI 319
+
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+#define BSIZE 8192
+
+#define YYCTYPE uchar
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RET(i) {s->cur = cursor; return i;}
+
+typedef struct Scanner {
+ int fd;
+ uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ uint line;
+} Scanner;
+
+uchar *fill(Scanner *s, uchar *cursor){
+ if(!s->eof){
+ uint cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ uchar *buf = (uchar*)
+ malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
+ s->eof = &s->lim[cnt]; *(s->eof)++ = '\\n';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+int scan(Scanner *s){
+ uchar *cursor = s->cur;
+std:
+ s->tok = cursor;
+/*!re2c
+any = [\\000-\\377];
+O = [0-7];
+D = [0-9];
+L = [a-zA-Z_];
+H = [a-fA-F0-9];
+E = [Ee] [+-]? D+;
+FS = [fFlL];
+IS = [uUlL]*;
+ESC = [\\\\] ([abfnrtv?'"\\\\] | "x" H+ | O+);
+*/
+
+/*!re2c
+ "/*" { goto comment; }
+
+ "auto" { RET(AUTO); }
+ "break" { RET(BREAK); }
+ "case" { RET(CASE); }
+ "char" { RET(CHAR); }
+ "const" { RET(CONST); }
+ "continue" { RET(CONTINUE); }
+ "default" { RET(DEFAULT); }
+ "do" { RET(DO); }
+ "double" { RET(DOUBLE); }
+ "else" { RET(ELSE); }
+ "enum" { RET(ENUM); }
+ "extern" { RET(EXTERN); }
+ "float" { RET(FLOAT); }
+ "for" { RET(FOR); }
+ "goto" { RET(GOTO); }
+ "if" { RET(IF); }
+ "int" { RET(INT); }
+ "long" { RET(LONG); }
+ "register" { RET(REGISTER); }
+ "return" { RET(RETURN); }
+ "short" { RET(SHORT); }
+ "signed" { RET(SIGNED); }
+ "sizeof" { RET(SIZEOF); }
+ "static" { RET(STATIC); }
+ "struct" { RET(STRUCT); }
+ "switch" { RET(SWITCH); }
+ "typedef" { RET(TYPEDEF); }
+ "union" { RET(UNION); }
+ "unsigned" { RET(UNSIGNED); }
+ "void" { RET(VOID); }
+ "volatile" { RET(VOLATILE); }
+ "while" { RET(WHILE); }
+
+ L (L|D)* { RET(ID); }
+
+ ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
+ (['] (ESC|any\\[\\n\\\\'])* ['])
+ { RET(ICON); }
+
+ (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
+ { RET(FCON); }
+
+ (["] (ESC|any\\[\\n\\\\"])* ["])
+ { RET(SCON); }
+
+ "..." { RET(ELLIPSIS); }
+ ">>=" { RET(RSHIFTEQ); }
+ "<<=" { RET(LSHIFTEQ); }
+ "+=" { RET(ADDEQ); }
+ "-=" { RET(SUBEQ); }
+ "*=" { RET(MULEQ); }
+ "/=" { RET(DIVEQ); }
+ "%=" { RET(MODEQ); }
+ "&=" { RET(ANDEQ); }
+ "^=" { RET(XOREQ); }
+ "|=" { RET(OREQ); }
+ ">>" { RET(RSHIFT); }
+ "<<" { RET(LSHIFT); }
+ "++" { RET(INCR); }
+ "--" { RET(DECR); }
+ "->" { RET(DEREF); }
+ "&&" { RET(ANDAND); }
+ "||" { RET(OROR); }
+ "<=" { RET(LEQ); }
+ ">=" { RET(GEQ); }
+ "==" { RET(EQL); }
+ "!=" { RET(NEQ); }
+ ";" { RET(';'); }
+ "{" { RET('{'); }
+ "}" { RET('}'); }
+ "," { RET(','); }
+ ":" { RET(':'); }
+ "=" { RET('='); }
+ "(" { RET('('); }
+ ")" { RET(')'); }
+ "[" { RET('['); }
+ "]" { RET(']'); }
+ "." { RET('.'); }
+ "&" { RET('&'); }
+ "!" { RET('!'); }
+ "~" { RET('~'); }
+ "-" { RET('-'); }
+ "+" { RET('+'); }
+ "*" { RET('*'); }
+ "/" { RET('/'); }
+ "%" { RET('%'); }
+ "<" { RET('<'); }
+ ">" { RET('>'); }
+ "^" { RET('^'); }
+ "|" { RET('|'); }
+ "?" { RET('?'); }
+
+
+ [ \\t\\v\\f]+ { goto std; }
+
+ "\\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->pos = cursor; s->line++;
+ goto std;
+ }
+
+ any
+ {
+ printf("unexpected character: %c\\n", *s->tok);
+ goto std;
+ }
+*/
+
+comment:
+/*!re2c
+ "*/" { goto std; }
+ "\\n"
+ {
+ if(cursor == s->eof) RET(EOI);
+ s->tok = s->pos = cursor; s->line++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+main(){
+ Scanner in;
+ int t;
+ memset((char*) &in, 0, sizeof(in));
+ in.fd = 0;
+ while((t = scan(&in)) != EOI){
+/*
+ printf("%d\\t%.*s\\n", t, in.cur - in.tok, in.tok);
+ printf("%d\\n", t);
+*/
+ }
+ close(in.fd);
+}
+.fi
+.in -3
+
+.SH "SEE ALSO"
+.LP
+flex(1), lex(1).
+
+.SH FEATURES
+.LP
+\*(re does not provide a default action:
+the generated code assumes that the input
+will consist of a sequence of tokens.
+Typically this can be dealt with by adding a rule such as the one for
+unexpected characters in the example above.
+.LP
+The user must arrange for a sentinel token to appear at the end of input
+(and provide a rule for matching it):
+\*(re does not provide an \fC<<EOF>>\fP expression.
+If the source is from a null-byte terminated string, a
+rule matching a null character will suffice. If the source is from a
+file then the approach taken in the example can be used: pad the input with
+a newline (or some other character that can't appear within another token);
+upon recognizing such a character check to see if it is the sentinel
+and act accordingly.
+.LP
+\*(re does not provide start conditions: use a separate scanner
+specification for each start condition (as illustrated in the above example).
+.LP
+No [^x]. Use difference instead.
+.SH BUGS
+.LP
+Only fixed length trailing context can be handled.
+.LP
+The maximum value appearing as a parameter \fIn\fP to \fCYYFILL\fP is not
+provided to the generated code (this value is needed for constructing
+the interface code).
+Note that this value is usually relatively small: for
+typical programming languages \fIn\fP will be the length of the longest
+keyword plus one.
+.LP
+Difference only works for character sets.
+.LP
+The \*(re internal algorithms need documentation.
+
+.SH AUTHOR
+.LP
+Please send bug reports, fixes and feedback to:
+.LP
+.nf
+Peter Bumbulis
+Computer Systems Group
+University of Waterloo
+Waterloo, Ontario
+N2L 3G1
+Internet: peterr@csg.uwaterloo.ca
+.fi
diff --git a/tools/re2c/scanner.c b/tools/re2c/scanner.c
new file mode 100644
index 0000000..034c935
--- /dev/null
+++ b/tools/re2c/scanner.c
@@ -0,0 +1,748 @@
+/* Generated by re2c 0.9.1-C on Sun Oct 9 22:15:58 2005
+ */
+#line 1 "scanner.re"
+#include <stdlib.h>
+#include <string.h>
+#include "tools/re2c/scanner.h"
+#include "tools/re2c/parse.h"
+#include "tools/re2c/globals.h"
+#include "tools/re2c/parser.h"
+
+#ifndef MAX
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#endif
+
+#define BSIZE 8192
+
+#define YYCTYPE unsigned char
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RETURN(i) {s->cur = cursor; return i;}
+
+static unsigned char *fill(Scanner*, unsigned char*);
+
+void
+Scanner_init(Scanner *s, FILE *i)
+{
+ s->in = i;
+ s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top =
+ s->eof = NULL;
+ s->tchar = s->tline = 0;
+ s->cline = 1;
+}
+
+static unsigned char *
+fill(Scanner *s, unsigned char *cursor)
+{
+ if(!s->eof){
+ unsigned int cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ unsigned char *buf = malloc(((s->lim - s->bot) + BSIZE) + 1);
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ if (s->bot)
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = fread(s->lim, 1, BSIZE, s->in)) != BSIZE){
+ s->eof = &s->lim[cnt]; *s->eof++ = '\0';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+#line 79 "scanner.re"
+
+
+int
+Scanner_echo(Scanner *s, FILE *out)
+{
+ unsigned char *cursor = s->cur;
+ int ignore_eoc = 0;
+
+ /* Catch EOF */
+ if (s->eof && cursor == s->eof)
+ return 0;
+
+ s->tok = cursor;
+echo:
+
+#line 87 "scanner.c"
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy0;
+ ++YYCURSOR;
+yy0:
+ if((YYLIMIT - YYCURSOR) < 11) YYFILL(11);
+ yych = *YYCURSOR;
+ if(yych <= ')'){
+ if(yych <= '\000') goto yy7;
+ if(yych == '\n') goto yy5;
+ goto yy9;
+ } else {
+ if(yych <= '*') goto yy4;
+ if(yych != '/') goto yy9;
+ goto yy2;
+ }
+yy2: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '*') goto yy12;
+ goto yy3;
+yy3:
+#line 117 "scanner.re"
+{ goto echo; }
+#line 112 "scanner.c"
+yy4: yych = *++YYCURSOR;
+ if(yych == '/') goto yy10;
+ goto yy3;
+yy5: yych = *++YYCURSOR;
+ goto yy6;
+yy6:
+#line 112 "scanner.re"
+{ fwrite(s->tok, 1, cursor - s->tok, out);
+ s->tok = s->pos = cursor; s->cline++; oline++;
+ goto echo; }
+#line 123 "scanner.c"
+yy7: yych = *++YYCURSOR;
+ goto yy8;
+yy8:
+#line 115 "scanner.re"
+{ fwrite(s->tok, 1, cursor - s->tok - 1, out); /* -1 so we don't write out the \0 */
+ if(cursor == s->eof) { RETURN(0); } }
+#line 130 "scanner.c"
+yy9: yych = *++YYCURSOR;
+ goto yy3;
+yy10: yych = *++YYCURSOR;
+ goto yy11;
+yy11:
+#line 103 "scanner.re"
+{
+ if (ignore_eoc) {
+ ignore_eoc = 0;
+ } else {
+ fwrite(s->tok, 1, cursor - s->tok, out);
+ }
+ s->tok = s->pos = cursor;
+ goto echo;
+ }
+#line 146 "scanner.c"
+yy12: yych = *++YYCURSOR;
+ if(yych == '!') goto yy14;
+ goto yy13;
+yy13: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy3;
+ }
+yy14: yych = *++YYCURSOR;
+ if(yych == 'm') goto yy15;
+ if(yych == 'r') goto yy16;
+ goto yy13;
+yy15: yych = *++YYCURSOR;
+ if(yych == 'a') goto yy21;
+ goto yy13;
+yy16: yych = *++YYCURSOR;
+ if(yych != 'e') goto yy13;
+ goto yy17;
+yy17: yych = *++YYCURSOR;
+ if(yych != '2') goto yy13;
+ goto yy18;
+yy18: yych = *++YYCURSOR;
+ if(yych != 'c') goto yy13;
+ goto yy19;
+yy19: yych = *++YYCURSOR;
+ goto yy20;
+yy20:
+#line 94 "scanner.re"
+{ fwrite(s->tok, 1, &cursor[-7] - s->tok, out);
+ s->tok = cursor;
+ RETURN(1); }
+#line 177 "scanner.c"
+yy21: yych = *++YYCURSOR;
+ if(yych != 'x') goto yy13;
+ goto yy22;
+yy22: yych = *++YYCURSOR;
+ if(yych != ':') goto yy13;
+ goto yy23;
+yy23: yych = *++YYCURSOR;
+ if(yych != 'r') goto yy13;
+ goto yy24;
+yy24: yych = *++YYCURSOR;
+ if(yych != 'e') goto yy13;
+ goto yy25;
+yy25: yych = *++YYCURSOR;
+ if(yych != '2') goto yy13;
+ goto yy26;
+yy26: yych = *++YYCURSOR;
+ if(yych != 'c') goto yy13;
+ goto yy27;
+yy27: yych = *++YYCURSOR;
+ goto yy28;
+yy28:
+#line 97 "scanner.re"
+{
+ fprintf(out, "#define YYMAXFILL %u\n", maxFill);
+ s->tok = s->pos = cursor;
+ ignore_eoc = 1;
+ goto echo;
+ }
+#line 206 "scanner.c"
+}
+#line 118 "scanner.re"
+
+}
+
+
+int
+Scanner_scan(Scanner *s)
+{
+ unsigned char *cursor = s->cur;
+ unsigned int depth;
+
+scan:
+ s->tchar = cursor - s->pos;
+ s->tline = s->cline;
+ s->tok = cursor;
+
+#line 224 "scanner.c"
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy29;
+ ++YYCURSOR;
+yy29:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '/'){
+ if(yych <= '"'){
+ if(yych <= '\n'){
+ if(yych <= '\b') goto yy53;
+ if(yych <= '\t') goto yy47;
+ goto yy49;
+ } else {
+ if(yych == ' ') goto yy47;
+ if(yych <= '!') goto yy53;
+ goto yy37;
+ }
+ } else {
+ if(yych <= '*'){
+ if(yych <= '&') goto yy53;
+ if(yych <= '\'') goto yy39;
+ if(yych <= ')') goto yy43;
+ goto yy35;
+ } else {
+ if(yych <= '+') goto yy44;
+ if(yych <= '-') goto yy53;
+ if(yych <= '.') goto yy51;
+ goto yy33;
+ }
+ }
+ } else {
+ if(yych <= '@'){
+ if(yych <= '<'){
+ if(yych == ';') goto yy43;
+ goto yy53;
+ } else {
+ if(yych <= '=') goto yy43;
+ if(yych == '?') goto yy44;
+ goto yy53;
+ }
+ } else {
+ if(yych <= '`'){
+ if(yych <= 'Z') goto yy45;
+ if(yych <= '[') goto yy41;
+ if(yych <= '\\') goto yy43;
+ goto yy53;
+ } else {
+ if(yych <= 'z') goto yy45;
+ if(yych <= '{') goto yy31;
+ if(yych <= '|') goto yy43;
+ goto yy53;
+ }
+ }
+ }
+yy31: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych <= '/') goto yy32;
+ if(yych <= '9') goto yy84;
+ goto yy32;
+yy32:
+#line 133 "scanner.re"
+{ depth = 1;
+ goto code;
+ }
+#line 291 "scanner.c"
+yy33: yych = *++YYCURSOR;
+ if(yych == '*') goto yy82;
+ goto yy34;
+yy34:
+#line 163 "scanner.re"
+{ RETURN(*s->tok); }
+#line 298 "scanner.c"
+yy35: yych = *++YYCURSOR;
+ if(yych == '/') goto yy80;
+ goto yy36;
+yy36:
+#line 165 "scanner.re"
+{ yylval.op = *s->tok;
+ RETURN(CLOSE); }
+#line 306 "scanner.c"
+yy37: yyaccept = 1;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych != '\n') goto yy76;
+ goto yy38;
+yy38:
+#line 150 "scanner.re"
+{ Scanner_fatal(s, "unterminated string constant (missing \")"); }
+#line 314 "scanner.c"
+yy39: yyaccept = 2;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych != '\n') goto yy71;
+ goto yy40;
+yy40:
+#line 151 "scanner.re"
+{ Scanner_fatal(s, "unterminated string constant (missing ')"); }
+#line 322 "scanner.c"
+yy41: yyaccept = 3;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy42;
+ if(yych == '^') goto yy62;
+ goto yy60;
+yy42:
+#line 161 "scanner.re"
+{ Scanner_fatal(s, "unterminated range (missing ])"); }
+#line 331 "scanner.c"
+yy43: yych = *++YYCURSOR;
+ goto yy34;
+yy44: yych = *++YYCURSOR;
+ goto yy36;
+yy45: yych = *++YYCURSOR;
+ goto yy58;
+yy46:
+#line 180 "scanner.re"
+{ SubStr substr;
+ s->cur = cursor;
+ substr = Scanner_token(s);
+ yylval.symbol = Symbol_find(&substr);
+ return ID; }
+#line 345 "scanner.c"
+yy47: yych = *++YYCURSOR;
+ goto yy56;
+yy48:
+#line 186 "scanner.re"
+{ goto scan; }
+#line 351 "scanner.c"
+yy49: yych = *++YYCURSOR;
+ goto yy50;
+yy50:
+#line 188 "scanner.re"
+{ if(cursor == s->eof) RETURN(0);
+ s->pos = cursor; s->cline++;
+ goto scan;
+ }
+#line 360 "scanner.c"
+yy51: yych = *++YYCURSOR;
+ goto yy52;
+yy52:
+#line 193 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = mkDot();
+ return RANGE;
+ }
+#line 369 "scanner.c"
+yy53: yych = *++YYCURSOR;
+ goto yy54;
+yy54:
+#line 198 "scanner.re"
+{ fprintf(stderr, "unexpected character: '%c'\n", *s->tok);
+ goto scan;
+ }
+#line 377 "scanner.c"
+yy55: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy56;
+yy56: if(yych == '\t') goto yy55;
+ if(yych == ' ') goto yy55;
+ goto yy48;
+yy57: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy58;
+yy58: if(yych <= '@'){
+ if(yych <= '/') goto yy46;
+ if(yych <= '9') goto yy57;
+ goto yy46;
+ } else {
+ if(yych <= 'Z') goto yy57;
+ if(yych <= '`') goto yy46;
+ if(yych <= 'z') goto yy57;
+ goto yy46;
+ }
+yy59: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy60;
+yy60: if(yych <= '['){
+ if(yych != '\n') goto yy59;
+ goto yy61;
+ } else {
+ if(yych <= '\\') goto yy64;
+ if(yych <= ']') goto yy65;
+ goto yy59;
+ }
+yy61: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy32;
+ case 1: goto yy38;
+ case 2: goto yy40;
+ case 3: goto yy42;
+ }
+yy62: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy63;
+yy63: if(yych <= '['){
+ if(yych == '\n') goto yy61;
+ goto yy62;
+ } else {
+ if(yych <= '\\') goto yy67;
+ if(yych <= ']') goto yy68;
+ goto yy62;
+ }
+yy64: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy61;
+ goto yy59;
+yy65: yych = *++YYCURSOR;
+ goto yy66;
+yy66:
+#line 157 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = ranToRE(Scanner_token(s));
+ return RANGE; }
+#line 442 "scanner.c"
+yy67: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy61;
+ goto yy62;
+yy68: yych = *++YYCURSOR;
+ goto yy69;
+yy69:
+#line 153 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = invToRE(Scanner_token(s));
+ return RANGE; }
+#line 455 "scanner.c"
+yy70: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy71;
+yy71: if(yych <= '&'){
+ if(yych == '\n') goto yy61;
+ goto yy70;
+ } else {
+ if(yych <= '\'') goto yy73;
+ if(yych != '\\') goto yy70;
+ goto yy72;
+ }
+yy72: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy61;
+ goto yy70;
+yy73: yych = *++YYCURSOR;
+ goto yy74;
+yy74:
+#line 146 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s));
+ return STRING; }
+#line 480 "scanner.c"
+yy75: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy76;
+yy76: if(yych <= '!'){
+ if(yych == '\n') goto yy61;
+ goto yy75;
+ } else {
+ if(yych <= '"') goto yy78;
+ if(yych != '\\') goto yy75;
+ goto yy77;
+ }
+yy77: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy61;
+ goto yy75;
+yy78: yych = *++YYCURSOR;
+ goto yy79;
+yy79:
+#line 142 "scanner.re"
+{ s->cur = cursor;
+ yylval.regexp = strToRE(Scanner_token(s));
+ return STRING; }
+#line 505 "scanner.c"
+yy80: yych = *++YYCURSOR;
+ goto yy81;
+yy81:
+#line 139 "scanner.re"
+{ s->tok = cursor;
+ RETURN(0); }
+#line 512 "scanner.c"
+yy82: yych = *++YYCURSOR;
+ goto yy83;
+yy83:
+#line 136 "scanner.re"
+{ depth = 1;
+ goto comment; }
+#line 519 "scanner.c"
+yy84: ++YYCURSOR;
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ goto yy85;
+yy85: if(yych <= '/'){
+ if(yych == ',') goto yy88;
+ goto yy61;
+ } else {
+ if(yych <= '9') goto yy84;
+ if(yych != '}') goto yy61;
+ goto yy86;
+ }
+yy86: yych = *++YYCURSOR;
+ goto yy87;
+yy87:
+#line 168 "scanner.re"
+{ yylval.extop.minsize = atoi((char *)s->tok+1);
+ yylval.extop.maxsize = atoi((char *)s->tok+1);
+ RETURN(CLOSESIZE); }
+#line 539 "scanner.c"
+yy88: yych = *++YYCURSOR;
+ if(yych != '}') goto yy92;
+ goto yy89;
+yy89: yych = *++YYCURSOR;
+ goto yy90;
+yy90:
+#line 176 "scanner.re"
+{ yylval.extop.minsize = atoi((char *)s->tok+1);
+ yylval.extop.maxsize = -1;
+ RETURN(CLOSESIZE); }
+#line 550 "scanner.c"
+yy91: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy92;
+yy92: if(yych <= '/') goto yy61;
+ if(yych <= '9') goto yy91;
+ if(yych != '}') goto yy61;
+ goto yy93;
+yy93: yych = *++YYCURSOR;
+ goto yy94;
+yy94:
+#line 172 "scanner.re"
+{ yylval.extop.minsize = atoi((char *)s->tok+1);
+ yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1));
+ RETURN(CLOSESIZE); }
+#line 566 "scanner.c"
+}
+#line 201 "scanner.re"
+
+
+code:
+
+#line 573 "scanner.c"
+{
+ YYCTYPE yych;
+ unsigned int yyaccept;
+ goto yy95;
+ ++YYCURSOR;
+yy95:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= '&'){
+ if(yych <= '\n'){
+ if(yych <= '\t') goto yy103;
+ goto yy101;
+ } else {
+ if(yych == '"') goto yy105;
+ goto yy103;
+ }
+ } else {
+ if(yych <= '{'){
+ if(yych <= '\'') goto yy106;
+ if(yych <= 'z') goto yy103;
+ goto yy99;
+ } else {
+ if(yych != '}') goto yy103;
+ goto yy97;
+ }
+ }
+yy97: yych = *++YYCURSOR;
+ goto yy98;
+yy98:
+#line 205 "scanner.re"
+{ if(--depth == 0){
+ s->cur = cursor;
+ yylval.token = Token_new(Scanner_token(s), s->tline);
+ return CODE;
+ }
+ goto code; }
+#line 610 "scanner.c"
+yy99: yych = *++YYCURSOR;
+ goto yy100;
+yy100:
+#line 211 "scanner.re"
+{ ++depth;
+ goto code; }
+#line 617 "scanner.c"
+yy101: yych = *++YYCURSOR;
+ goto yy102;
+yy102:
+#line 213 "scanner.re"
+{ if(cursor == s->eof) Scanner_fatal(s, "missing '}'");
+ s->pos = cursor; s->cline++;
+ goto code;
+ }
+#line 626 "scanner.c"
+yy103: yych = *++YYCURSOR;
+ goto yy104;
+yy104:
+#line 217 "scanner.re"
+{ goto code; }
+#line 632 "scanner.c"
+yy105: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy104;
+ goto yy112;
+yy106: yyaccept = 0;
+ yych = *(YYMARKER = ++YYCURSOR);
+ if(yych == '\n') goto yy104;
+ goto yy108;
+yy107: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy108;
+yy108: if(yych <= '&'){
+ if(yych != '\n') goto yy107;
+ goto yy109;
+ } else {
+ if(yych <= '\'') goto yy103;
+ if(yych == '\\') goto yy110;
+ goto yy107;
+ }
+yy109: YYCURSOR = YYMARKER;
+ switch(yyaccept){
+ case 0: goto yy104;
+ }
+yy110: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy109;
+ goto yy107;
+yy111: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ goto yy112;
+yy112: if(yych <= '!'){
+ if(yych == '\n') goto yy109;
+ goto yy111;
+ } else {
+ if(yych <= '"') goto yy103;
+ if(yych != '\\') goto yy111;
+ goto yy113;
+ }
+yy113: ++YYCURSOR;
+ if(YYLIMIT == YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ if(yych == '\n') goto yy109;
+ goto yy111;
+}
+#line 218 "scanner.re"
+
+
+comment:
+
+#line 685 "scanner.c"
+{
+ YYCTYPE yych;
+ goto yy114;
+ ++YYCURSOR;
+yy114:
+ if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ if(yych <= ')'){
+ if(yych == '\n') goto yy119;
+ goto yy121;
+ } else {
+ if(yych <= '*') goto yy116;
+ if(yych == '/') goto yy118;
+ goto yy121;
+ }
+yy116: yych = *++YYCURSOR;
+ if(yych == '/') goto yy124;
+ goto yy117;
+yy117:
+#line 232 "scanner.re"
+{ goto comment; }
+#line 707 "scanner.c"
+yy118: yych = *++YYCURSOR;
+ if(yych == '*') goto yy122;
+ goto yy117;
+yy119: yych = *++YYCURSOR;
+ goto yy120;
+yy120:
+#line 228 "scanner.re"
+{ if(cursor == s->eof) RETURN(0);
+ s->tok = s->pos = cursor; s->cline++;
+ goto comment;
+ }
+#line 719 "scanner.c"
+yy121: yych = *++YYCURSOR;
+ goto yy117;
+yy122: yych = *++YYCURSOR;
+ goto yy123;
+yy123:
+#line 226 "scanner.re"
+{ ++depth;
+ goto comment; }
+#line 728 "scanner.c"
+yy124: yych = *++YYCURSOR;
+ goto yy125;
+yy125:
+#line 222 "scanner.re"
+{ if(--depth == 0)
+ goto scan;
+ else
+ goto comment; }
+#line 737 "scanner.c"
+}
+#line 233 "scanner.re"
+
+}
+
+void
+Scanner_fatal(Scanner *s, const char *msg)
+{
+ fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg);
+ exit(1);
+}
diff --git a/tools/re2c/scanner.h b/tools/re2c/scanner.h
new file mode 100644
index 0000000..a5720b7
--- /dev/null
+++ b/tools/re2c/scanner.h
@@ -0,0 +1,44 @@
+#ifndef _scanner_h
+#define _scanner_h
+
+#include <stdio.h>
+#include "tools/re2c/token.h"
+
+typedef struct Scanner {
+ FILE *in;
+ unsigned char *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
+ unsigned int tchar, tline, cline;
+} Scanner;
+
+void Scanner_init(Scanner*, FILE *);
+static Scanner *Scanner_new(FILE *);
+
+int Scanner_echo(Scanner*, FILE *);
+int Scanner_scan(Scanner*);
+void Scanner_fatal(Scanner*, const char*);
+static SubStr Scanner_token(Scanner*);
+static unsigned int Scanner_line(Scanner*);
+
+static SubStr
+Scanner_token(Scanner *s)
+{
+ SubStr r;
+ SubStr_init_u(&r, s->tok, s->cur - s->tok);
+ return r;
+}
+
+static unsigned int
+Scanner_line(Scanner *s)
+{
+ return s->cline;
+}
+
+static Scanner *
+Scanner_new(FILE *i)
+{
+ Scanner *r = malloc(sizeof(Scanner));
+ Scanner_init(r, i);
+ return r;
+}
+
+#endif
diff --git a/tools/re2c/scanner.re b/tools/re2c/scanner.re
new file mode 100644
index 0000000..423835b
--- /dev/null
+++ b/tools/re2c/scanner.re
@@ -0,0 +1,241 @@
+#include <stdlib.h>
+#include <string.h>
+#include "tools/re2c/scanner.h"
+#include "tools/re2c/parse.h"
+#include "tools/re2c/globals.h"
+#include "tools/re2c/parser.h"
+
+#ifndef MAX
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#endif
+
+#define BSIZE 8192
+
+#define YYCTYPE unsigned char
+#define YYCURSOR cursor
+#define YYLIMIT s->lim
+#define YYMARKER s->ptr
+#define YYFILL(n) {cursor = fill(s, cursor);}
+
+#define RETURN(i) {s->cur = cursor; return i;}
+
+static unsigned char *fill(Scanner*, unsigned char*);
+
+void
+Scanner_init(Scanner *s, FILE *i)
+{
+ s->in = i;
+ s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top =
+ s->eof = NULL;
+ s->tchar = s->tline = 0;
+ s->cline = 1;
+}
+
+static unsigned char *
+fill(Scanner *s, unsigned char *cursor)
+{
+ if(!s->eof){
+ unsigned int cnt = s->tok - s->bot;
+ if(cnt){
+ memcpy(s->bot, s->tok, s->lim - s->tok);
+ s->tok = s->bot;
+ s->ptr -= cnt;
+ cursor -= cnt;
+ s->pos -= cnt;
+ s->lim -= cnt;
+ }
+ if((s->top - s->lim) < BSIZE){
+ unsigned char *buf = malloc(((s->lim - s->bot) + BSIZE) + 1);
+ memcpy(buf, s->tok, s->lim - s->tok);
+ s->tok = buf;
+ s->ptr = &buf[s->ptr - s->bot];
+ cursor = &buf[cursor - s->bot];
+ s->pos = &buf[s->pos - s->bot];
+ s->lim = &buf[s->lim - s->bot];
+ s->top = &s->lim[BSIZE];
+ if (s->bot)
+ free(s->bot);
+ s->bot = buf;
+ }
+ if((cnt = fread(s->lim, 1, BSIZE, s->in)) != BSIZE){
+ s->eof = &s->lim[cnt]; *s->eof++ = '\0';
+ }
+ s->lim += cnt;
+ }
+ return cursor;
+}
+
+/*!re2c
+zero = "\000";
+any = [\000-\377];
+dot = any \ [\n];
+esc = dot \ [\\];
+istring = "[" "^" ((esc \ [\]]) | "\\" dot)* "]" ;
+cstring = "[" ((esc \ [\]]) | "\\" dot)* "]" ;
+dstring = "\"" ((esc \ ["] ) | "\\" dot)* "\"";
+sstring = "'" ((esc \ ['] ) | "\\" dot)* "'" ;
+letter = [a-zA-Z];
+digit = [0-9];
+*/
+
+int
+Scanner_echo(Scanner *s, FILE *out)
+{
+ unsigned char *cursor = s->cur;
+ int ignore_eoc = 0;
+
+ /* Catch EOF */
+ if (s->eof && cursor == s->eof)
+ return 0;
+
+ s->tok = cursor;
+echo:
+/*!re2c
+ "/*!re2c" { fwrite(s->tok, 1, &cursor[-7] - s->tok, out);
+ s->tok = cursor;
+ RETURN(1); }
+ "/*!max:re2c" {
+ fprintf(out, "#define YYMAXFILL %u\n", maxFill);
+ s->tok = s->pos = cursor;
+ ignore_eoc = 1;
+ goto echo;
+ }
+ "*" "/" {
+ if (ignore_eoc) {
+ ignore_eoc = 0;
+ } else {
+ fwrite(s->tok, 1, cursor - s->tok, out);
+ }
+ s->tok = s->pos = cursor;
+ goto echo;
+ }
+ "\n" { fwrite(s->tok, 1, cursor - s->tok, out);
+ s->tok = s->pos = cursor; s->cline++; oline++;
+ goto echo; }
+ zero { fwrite(s->tok, 1, cursor - s->tok - 1, out); /* -1 so we don't write out the \0 */
+ if(cursor == s->eof) { RETURN(0); } }
+ any { goto echo; }
+*/
+}
+
+
+int
+Scanner_scan(Scanner *s)
+{
+ unsigned char *cursor = s->cur;
+ unsigned int depth;
+
+scan:
+ s->tchar = cursor - s->pos;
+ s->tline = s->cline;
+ s->tok = cursor;
+/*!re2c
+ "{" { depth = 1;
+ goto code;
+ }
+ "/*" { depth = 1;
+ goto comment; }
+
+ "*/" { s->tok = cursor;
+ RETURN(0); }
+
+ dstring { s->cur = cursor;
+ yylval.regexp = strToRE(Scanner_token(s));
+ return STRING; }
+
+ sstring { s->cur = cursor;
+ yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s));
+ return STRING; }
+
+ "\"" { Scanner_fatal(s, "unterminated string constant (missing \")"); }
+ "'" { Scanner_fatal(s, "unterminated string constant (missing ')"); }
+
+ istring { s->cur = cursor;
+ yylval.regexp = invToRE(Scanner_token(s));
+ return RANGE; }
+
+ cstring { s->cur = cursor;
+ yylval.regexp = ranToRE(Scanner_token(s));
+ return RANGE; }
+
+ "[" { Scanner_fatal(s, "unterminated range (missing ])"); }
+
+ [()|=;/\\] { RETURN(*s->tok); }
+
+ [*+?] { yylval.op = *s->tok;
+ RETURN(CLOSE); }
+
+ "{" [0-9]+ "}" { yylval.extop.minsize = atoi((char *)s->tok+1);
+ yylval.extop.maxsize = atoi((char *)s->tok+1);
+ RETURN(CLOSESIZE); }
+
+ "{" [0-9]+ "," [0-9]+ "}" { yylval.extop.minsize = atoi((char *)s->tok+1);
+ yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1));
+ RETURN(CLOSESIZE); }
+
+ "{" [0-9]+ ",}" { yylval.extop.minsize = atoi((char *)s->tok+1);
+ yylval.extop.maxsize = -1;
+ RETURN(CLOSESIZE); }
+
+ letter (letter|digit)* { SubStr substr;
+ s->cur = cursor;
+ substr = Scanner_token(s);
+ yylval.symbol = Symbol_find(&substr);
+ return ID; }
+
+ [ \t]+ { goto scan; }
+
+ "\n" { if(cursor == s->eof) RETURN(0);
+ s->pos = cursor; s->cline++;
+ goto scan;
+ }
+
+ "." { s->cur = cursor;
+ yylval.regexp = mkDot();
+ return RANGE;
+ }
+
+ any { fprintf(stderr, "unexpected character: '%c'\n", *s->tok);
+ goto scan;
+ }
+*/
+
+code:
+/*!re2c
+ "}" { if(--depth == 0){
+ s->cur = cursor;
+ yylval.token = Token_new(Scanner_token(s), s->tline);
+ return CODE;
+ }
+ goto code; }
+ "{" { ++depth;
+ goto code; }
+ "\n" { if(cursor == s->eof) Scanner_fatal(s, "missing '}'");
+ s->pos = cursor; s->cline++;
+ goto code;
+ }
+ dstring | sstring | any { goto code; }
+*/
+
+comment:
+/*!re2c
+ "*/" { if(--depth == 0)
+ goto scan;
+ else
+ goto comment; }
+ "/*" { ++depth;
+ goto comment; }
+ "\n" { if(cursor == s->eof) RETURN(0);
+ s->tok = s->pos = cursor; s->cline++;
+ goto comment;
+ }
+ any { goto comment; }
+*/
+}
+
+void
+Scanner_fatal(Scanner *s, const char *msg)
+{
+ fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg);
+ exit(1);
+}
diff --git a/tools/re2c/substr.c b/tools/re2c/substr.c
new file mode 100644
index 0000000..c750fb9
--- /dev/null
+++ b/tools/re2c/substr.c
@@ -0,0 +1,65 @@
+#include <string.h>
+#include "tools/re2c/substr.h"
+#include "tools/re2c/globals.h"
+
+void
+SubStr_out(const SubStr *s, FILE *o)
+{
+ unsigned int i;
+ fwrite(s->str, s->len, 1, o);
+ for (i=0; i<s->len; i++)
+ if (s->str[i] == '\n')
+ oline++;
+}
+
+int
+SubStr_eq(const SubStr *s1, const SubStr *s2)
+{
+ return (s1->len == s2->len && memcmp(s1->str, s2->str, s1->len) == 0);
+}
+
+void
+Str_init(Str *r, const SubStr* s)
+{
+ SubStr_init(r, malloc(sizeof(char)*s->len), s->len);
+ memcpy(r->str, s->str, s->len);
+}
+
+Str *
+Str_new(const SubStr* s)
+{
+ Str *r = SubStr_new(malloc(sizeof(char)*s->len), s->len);
+ memcpy(r->str, s->str, s->len);
+ return r;
+}
+
+void
+Str_copy(Str *r, Str* s)
+{
+ SubStr_init(r, s->str, s->len);
+ s->str = NULL;
+ s->len = 0;
+}
+
+Str *
+Str_new_copy(Str* s)
+{
+ Str *r = SubStr_new(s->str, s->len);
+ s->str = NULL;
+ s->len = 0;
+ return r;
+}
+
+Str *
+Str_new_empty(void)
+{
+ return SubStr_new(NULL, 0);
+}
+
+
+void Str_delete(Str *s) {
+ free(s->str);
+ s->str = (char*)-1;
+ s->len = (unsigned int)-1;
+ free(s);
+}
diff --git a/tools/re2c/substr.h b/tools/re2c/substr.h
new file mode 100644
index 0000000..0a19b93
--- /dev/null
+++ b/tools/re2c/substr.h
@@ -0,0 +1,89 @@
+#ifndef re2c_substr_h
+#define re2c_substr_h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "tools/re2c/basics.h"
+
+struct SubStr {
+ char *str;
+ unsigned int len;
+};
+
+typedef struct SubStr SubStr;
+
+int SubStr_eq(const SubStr *, const SubStr *);
+
+static void SubStr_init_u(SubStr*, unsigned char*, unsigned int);
+static SubStr *SubStr_new_u(unsigned char*, unsigned int);
+
+static void SubStr_init(SubStr*, char*, unsigned int);
+static SubStr *SubStr_new(char*, unsigned int);
+
+static void SubStr_copy(SubStr*, const SubStr*);
+static SubStr *SubStr_new_copy(const SubStr*);
+
+void SubStr_out(const SubStr*, FILE *);
+#define SubStr_delete(x) free(x)
+
+typedef struct SubStr Str;
+
+void Str_init(Str*, const SubStr*);
+Str *Str_new(const SubStr*);
+
+void Str_copy(Str*, Str*);
+Str *Str_new_copy(Str*);
+
+Str *Str_new_empty(void);
+void Str_destroy(Str *);
+void Str_delete(Str *);
+
+static void
+SubStr_init_u(SubStr *r, unsigned char *s, unsigned int l)
+{
+ r->str = (char*)s;
+ r->len = l;
+}
+
+static SubStr *
+SubStr_new_u(unsigned char *s, unsigned int l)
+{
+ SubStr *r = malloc(sizeof(SubStr));
+ r->str = (char*)s;
+ r->len = l;
+ return r;
+}
+
+static void
+SubStr_init(SubStr *r, char *s, unsigned int l)
+{
+ r->str = s;
+ r->len = l;
+}
+
+static SubStr *
+SubStr_new(char *s, unsigned int l)
+{
+ SubStr *r = malloc(sizeof(SubStr));
+ r->str = s;
+ r->len = l;
+ return r;
+}
+
+static void
+SubStr_copy(SubStr *r, const SubStr *s)
+{
+ r->str = s->str;
+ r->len = s->len;
+}
+
+static SubStr *
+SubStr_new_copy(const SubStr *s)
+{
+ SubStr *r = malloc(sizeof(SubStr));
+ r->str = s->str;
+ r->len = s->len;
+ return r;
+}
+
+#endif
diff --git a/tools/re2c/token.h b/tools/re2c/token.h
new file mode 100644
index 0000000..d50a46d
--- /dev/null
+++ b/tools/re2c/token.h
@@ -0,0 +1,30 @@
+#ifndef re2c_token_h
+#define re2c_token_h
+
+#include "substr.h"
+
+typedef struct Token {
+ Str text;
+ unsigned int line;
+} Token;
+
+static void Token_init(Token *, SubStr, unsigned int);
+static Token *Token_new(SubStr, unsigned int);
+
+static void
+Token_init(Token *r, SubStr t, unsigned int l)
+{
+ Str_copy(&r->text, &t);
+ r->line = l;
+}
+
+static Token *
+Token_new(SubStr t, unsigned int l)
+{
+ Token *r = malloc(sizeof(Token));
+ Str_init(&r->text, &t);
+ r->line = l;
+ return r;
+}
+
+#endif
diff --git a/tools/re2c/translate.c b/tools/re2c/translate.c
new file mode 100644
index 0000000..7ba173e
--- /dev/null
+++ b/tools/re2c/translate.c
@@ -0,0 +1,61 @@
+#include "tools/re2c/globals.h"
+
+unsigned char asc2asc[256] = {
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
+0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
+0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
+0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+};
+
+unsigned char *xlat = asc2asc;
+unsigned char *talx = asc2asc;
+
+unsigned char asc2ebc[256] = { /* Based on ISO 8859/1 and Code Page 37 */
+0x00,0x01,0x02,0x03,0x37,0x2d,0x2e,0x2f,0x16,0x05,0x25,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x3c,0x3d,0x32,0x26,0x18,0x19,0x3f,0x27,0x1c,0x1d,0x1e,0x1f,
+0x40,0x5a,0x7f,0x7b,0x5b,0x6c,0x50,0x7d,0x4d,0x5d,0x5c,0x4e,0x6b,0x60,0x4b,0x61,
+0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0x7a,0x5e,0x4c,0x7e,0x6e,0x6f,
+0x7c,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
+0xd7,0xd8,0xd9,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xba,0xe0,0xbb,0xb0,0x6d,
+0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96,
+0x97,0x98,0x99,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xc0,0x4f,0xd0,0xa1,0x07,
+0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x09,0x0a,0x1b,
+0x30,0x31,0x1a,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3a,0x3b,0x04,0x14,0x3e,0xff,
+0x41,0xaa,0x4a,0xb1,0x9f,0xb2,0x6a,0xb5,0xbd,0xb4,0x9a,0x8a,0x5f,0xca,0xaf,0xbc,
+0x90,0x8f,0xea,0xfa,0xbe,0xa0,0xb6,0xb3,0x9d,0xda,0x9b,0x8b,0xb7,0xb8,0xb9,0xab,
+0x64,0x65,0x62,0x66,0x63,0x67,0x9e,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77,
+0xac,0x69,0xed,0xee,0xeb,0xef,0xec,0xbf,0x80,0xfd,0xfe,0xfb,0xfc,0xad,0x8e,0x59,
+0x44,0x45,0x42,0x46,0x43,0x47,0x9c,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57,
+0x8c,0x49,0xcd,0xce,0xcb,0xcf,0xcc,0xe1,0x70,0xdd,0xde,0xdb,0xdc,0x8d,0xae,0xdf
+};
+
+unsigned char ebc2asc[256] = { /* Based on ISO 8859/1 and Code Page 37 */
+0x00,0x01,0x02,0x03,0x9c,0x09,0x86,0x7f,0x97,0x8d,0x8e,0x0b,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x9d,0x85,0x08,0x87,0x18,0x19,0x92,0x8f,0x1c,0x1d,0x1e,0x1f,
+0x80,0x81,0x82,0x83,0x84,0x0a,0x17,0x1b,0x88,0x89,0x8a,0x8b,0x8c,0x05,0x06,0x07,
+0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9a,0x9b,0x14,0x15,0x9e,0x1a,
+0x20,0xa0,0xe2,0xe4,0xe0,0xe1,0xe3,0xe5,0xe7,0xf1,0xa2,0x2e,0x3c,0x28,0x2b,0x7c,
+0x26,0xe9,0xea,0xeb,0xe8,0xed,0xee,0xef,0xec,0xdf,0x21,0x24,0x2a,0x29,0x3b,0xac,
+0x2d,0x2f,0xc2,0xc4,0xc0,0xc1,0xc3,0xc5,0xc7,0xd1,0xa6,0x2c,0x25,0x5f,0x3e,0x3f,
+0xf8,0xc9,0xca,0xcb,0xc8,0xcd,0xce,0xcf,0xcc,0x60,0x3a,0x23,0x40,0x27,0x3d,0x22,
+0xd8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xab,0xbb,0xf0,0xfd,0xde,0xb1,
+0xb0,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0xaa,0xba,0xe6,0xb8,0xc6,0xa4,
+0xb5,0x7e,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0xa1,0xbf,0xd0,0xdd,0xfe,0xae,
+0x5e,0xa3,0xa5,0xb7,0xa9,0xa7,0xb6,0xbc,0xbd,0xbe,0x5b,0x5d,0xaf,0xa8,0xb4,0xd7,
+0x7b,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xad,0xf4,0xf6,0xf2,0xf3,0xf5,
+0x7d,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0xb9,0xfb,0xfc,0xf9,0xfa,0xff,
+0x5c,0xf7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0xb2,0xd4,0xd6,0xd2,0xd3,0xd5,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xb3,0xdb,0xdc,0xd9,0xda,0x9f
+};