diff options
Diffstat (limited to 'tools')
74 files changed, 14607 insertions, 0 deletions
diff --git a/tools/Makefile.inc b/tools/Makefile.inc new file mode 100644 index 0000000..dbc67fc --- /dev/null +++ b/tools/Makefile.inc @@ -0,0 +1,9 @@ +EXTRA_DIST += tools/re2c/Makefile.inc +EXTRA_DIST += tools/genmacro/Makefile.inc +EXTRA_DIST += tools/genperf/Makefile.inc +EXTRA_DIST += tools/python-yasm/Makefile.inc + +include tools/re2c/Makefile.inc +include tools/genmacro/Makefile.inc +include tools/genperf/Makefile.inc +include tools/python-yasm/Makefile.inc diff --git a/tools/genmacro/Makefile.inc b/tools/genmacro/Makefile.inc new file mode 100644 index 0000000..722f95d --- /dev/null +++ b/tools/genmacro/Makefile.inc @@ -0,0 +1,14 @@ +# These utility programs have to be built for BUILD host in cross-build. +# This makes things rather non-standard automake + +noinst_PROGRAMS += genmacro + +genmacro_SOURCES = +EXTRA_DIST += tools/genmacro/genmacro.c +genmacro_LDADD = genmacro.$(OBJEXT) +genmacro_LINK = $(CCLD_FOR_BUILD) -o $@ + +genmacro.$(OBJEXT): tools/genmacro/genmacro.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/genmacro/genmacro.c || echo '$(srcdir)/'`tools/genmacro/genmacro.c + diff --git a/tools/genmacro/genmacro.c b/tools/genmacro/genmacro.c new file mode 100644 index 0000000..8e702b8 --- /dev/null +++ b/tools/genmacro/genmacro.c @@ -0,0 +1,134 @@ +/* + * + * C version of NASM's macros.pl + * + * Copyright (C) 2004-2008 Peter Johnson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define MAXLINE 1024 + +int +main(int argc, char *argv[]) +{ + FILE *in, *out; + int i; + char *str; + char *strp; + char *charp; + int fline; + int line = 0; + int lindex = 0; + size_t len; + + if (argc < 4) { + fprintf(stderr, "Usage: %s <out> <var> <file> [<file> ...]\n", argv[0]); + return EXIT_FAILURE; + } + + out = fopen(argv[1], "wt"); + + if (!out) { + fprintf(stderr, "Could not open `%s'.\n", argv[1]); + return EXIT_FAILURE; + } + + str = malloc(MAXLINE); + + fprintf(out, "/* This file auto-generated from standard.mac by genmacro.c" + " - don't edit it */\n\n#include <stddef.h>\n\n" + "static const char *%s[] = {\n", argv[2]); + + for (i=3; i<argc; i++) { + in = fopen(argv[i], "rt"); + if (!in) { + fprintf(stderr, "Could not open `%s'.\n", argv[i]); + fclose(out); + remove(argv[1]); + return EXIT_FAILURE; + } + + fline = 0; + + while (fgets(str, MAXLINE, in)) { + line++; + fline++; + + strp = str; + + /* check for unterminated quotes and delete comments */ + charp = strp; + while ((charp = strpbrk(charp, "'\";"))) { + if (charp[0] == ';') { + *charp = '\0'; + break; + } + if ((charp = strchr(charp+1, charp[0])) == NULL) { + fprintf(stderr, "%s:%d: error: unterminated quote\n", + argv[i], fline); + fclose(out); + remove(argv[1]); + return EXIT_FAILURE; + } + charp++; + } + + /* strip off leading and trailing whitespace */ + while (*strp == ' ' || *strp == '\t') + strp++; + len = strlen(strp); + while (len > 0 && (strp[len-1] == ' ' || strp[len-1] == '\t' || + strp[len-1] == '\n')) { + strp[len-1] = '\0'; + len--; + } + + /* skip blank lines */ + if (len == 0) + continue; + + /* output as string to output file */ + fprintf(out, " \""); + while (*strp != '\0') { + if (*strp == '\\' || *strp == '"') + fputc('\\', out); + fputc(*strp, out); + strp++; + } + fprintf(out, "\",\n"); + lindex++; + } + + fclose(in); + } + + fprintf(out, " NULL\n};\n"); + fclose(out); + + free(str); + + return EXIT_SUCCESS; +} diff --git a/tools/genperf/Makefile.inc b/tools/genperf/Makefile.inc new file mode 100644 index 0000000..135da6b --- /dev/null +++ b/tools/genperf/Makefile.inc @@ -0,0 +1,42 @@ +# These utility programs have to be built for BUILD host in cross-build. +# This makes things rather non-standard automake + +noinst_PROGRAMS += genperf + +# Suffix rule for genperf +SUFFIXES += .gperf +.gperf.c: genperf$(EXEEXT) + $(top_builddir)/genperf$(EXEEXT) $< $@ + +genperf_SOURCES = +EXTRA_DIST += tools/genperf/genperf.c +EXTRA_DIST += tools/genperf/perfect.c +EXTRA_DIST += tools/genperf/perfect.h +EXTRA_DIST += tools/genperf/standard.h +genperf_LDADD = genperf.$(OBJEXT) +genperf_LDADD += gp-perfect.$(OBJEXT) +genperf_LDADD += gp-phash.$(OBJEXT) +genperf_LDADD += gp-xmalloc.$(OBJEXT) +genperf_LDADD += gp-xstrdup.$(OBJEXT) +genperf_LINK = $(CCLD_FOR_BUILD) -o $@ + +genperf.$(OBJEXT): tools/genperf/genperf.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/genperf/genperf.c || echo '$(srcdir)/'`tools/genperf/genperf.c + +gp-perfect.$(OBJEXT): tools/genperf/perfect.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/genperf/perfect.c || echo '$(srcdir)/'`tools/genperf/perfect.c + +gp-phash.$(OBJEXT): libyasm/phash.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f libyasm/phash.c || echo '$(srcdir)/'`libyasm/phash.c + +gp-xmalloc.$(OBJEXT): libyasm/xmalloc.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f libyasm/xmalloc.c || echo '$(srcdir)/'`libyasm/xmalloc.c + +gp-xstrdup.$(OBJEXT): libyasm/xstrdup.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f libyasm/xstrdup.c || echo '$(srcdir)/'`libyasm/xstrdup.c + diff --git a/tools/genperf/genperf.c b/tools/genperf/genperf.c new file mode 100644 index 0000000..c3cfa76 --- /dev/null +++ b/tools/genperf/genperf.c @@ -0,0 +1,540 @@ +/* + * + * Generate Minimal Perfect Hash (genperf) + * + * Copyright (C) 2006-2007 Peter Johnson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdio.h> +#include <ctype.h> +#include <stdarg.h> +#include <string.h> +#include "tools/genperf/perfect.h" +#include "libyasm/compat-queue.h" +#include "libyasm/coretype.h" +#include "libyasm/errwarn.h" + +typedef STAILQ_HEAD(slist, sval) slist; +typedef struct sval { + STAILQ_ENTRY(sval) link; + char *str; +} sval; + +typedef STAILQ_HEAD(keyword_list, keyword) keyword_list; +typedef struct keyword { + STAILQ_ENTRY(keyword) link; + char *name; + char *args; + unsigned int line; +} keyword; + +static unsigned int cur_line = 1; +static int errors = 0; + +static void +report_error(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "%u: ", cur_line); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + errors++; +} + +void +yasm__fatal(const char *message, ...) +{ + abort(); +} + +/* make the c output for the perfect hash tab array */ +static void +make_c_tab( + FILE *f, + bstuff *tab, /* table indexed by b */ + ub4 smax, /* range of scramble[] */ + ub4 blen, /* b in 0..blen-1, power of 2 */ + ub4 *scramble) /* used in final hash */ +{ + ub4 i; + /* table for the mapping for the perfect hash */ + if (blen >= USE_SCRAMBLE) { + /* A way to make the 1-byte values in tab bigger */ + if (smax > UB2MAXVAL+1) { + fprintf(f, " static const unsigned long scramble[] = {\n"); + for (i=0; i<=UB1MAXVAL; i+=4) + fprintf(f, " 0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n", + scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]); + } else { + fprintf(f, " static const unsigned short scramble[] = {\n"); + for (i=0; i<=UB1MAXVAL; i+=8) + fprintf(f, +" 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n", + scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3], + scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]); + } + fprintf(f, " };\n"); + fprintf(f, "\n"); + } + + if (blen > 0) { + /* small adjustments to _a_ to make values distinct */ + if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE) + fprintf(f, " static const unsigned char "); + else + fprintf(f, " static const unsigned short "); + fprintf(f, "tab[] = {\n"); + + if (blen < 16) { + for (i=0; i<blen; ++i) + fprintf(f, "%3ld,", scramble[tab[i].val_b]); + } else if (blen <= 1024) { + for (i=0; i<blen; i+=16) + fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n", + scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], + scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], + scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], + scramble[tab[i+6].val_b], scramble[tab[i+7].val_b], + scramble[tab[i+8].val_b], scramble[tab[i+9].val_b], + scramble[tab[i+10].val_b], scramble[tab[i+11].val_b], + scramble[tab[i+12].val_b], scramble[tab[i+13].val_b], + scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]); + } else if (blen < USE_SCRAMBLE) { + for (i=0; i<blen; i+=8) + fprintf(f, " %ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n", + scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], + scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], + scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], + scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]); + } else { + for (i=0; i<blen; i+=16) + fprintf(f, " %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,\n", + tab[i+0].val_b, tab[i+1].val_b, + tab[i+2].val_b, tab[i+3].val_b, + tab[i+4].val_b, tab[i+5].val_b, + tab[i+6].val_b, tab[i+7].val_b, + tab[i+8].val_b, tab[i+9].val_b, + tab[i+10].val_b, tab[i+11].val_b, + tab[i+12].val_b, tab[i+13].val_b, + tab[i+14].val_b, tab[i+15].val_b); + } + fprintf(f, " };\n"); + fprintf(f, "\n"); + } +} + +static void +perfect_gen(FILE *out, const char *lookup_function_name, + const char *struct_name, keyword_list *kws, + const char *filename) +{ + ub4 nkeys; + key *keys; + hashform form; + bstuff *tab; /* table indexed by b */ + hstuff *tabh; /* table indexed by hash value */ + ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */ + ub4 alen; /* a in 0..alen-1, a power of 2 */ + ub4 blen; /* b in 0..blen-1, a power of 2 */ + ub4 salt; /* a parameter to the hash function */ + gencode final; /* code for final hash */ + ub4 i; + ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */ + char buf[10][80]; /* buffer for generated code */ + char *buf2[10]; /* also for generated code */ + keyword *kw; + + /* perfect hash configuration */ + form.mode = NORMAL_HM; + form.hashtype = STRING_HT; + form.perfect = MINIMAL_HP; + form.speed = SLOW_HS; + + /* set up code for final hash */ + final.line = buf2; + final.used = 0; + final.len = 10; + for (i=0; i<10; i++) + final.line[i] = buf[i]; + + /* build list of keys */ + nkeys = 0; + keys = NULL; + STAILQ_FOREACH(kw, kws, link) { + key *k = yasm_xmalloc(sizeof(key)); + + k->name_k = yasm__xstrdup(kw->name); + k->len_k = (ub4)strlen(kw->name); + k->next_k = keys; + keys = k; + nkeys++; + } + + /* find the hash */ + findhash(&tab, &tabh, &alen, &blen, &salt, &final, + scramble, &smax, keys, nkeys, &form); + + /* The hash function beginning */ + fprintf(out, "static const struct %s *\n", struct_name); + fprintf(out, "%s(const char *key, size_t len)\n", lookup_function_name); + fprintf(out, "{\n"); + + /* output the dir table: this should loop up to smax for NORMAL_HP, + * or up to pakd.nkeys for MINIMAL_HP. + */ + fprintf(out, " static const struct %s pd[%lu] = {\n", struct_name, nkeys); + for (i=0; i<nkeys; i++) { + if (tabh[i].key_h) { + STAILQ_FOREACH(kw, kws, link) { + if (strcmp(kw->name, tabh[i].key_h->name_k) == 0) + break; + } + if (!kw) { + report_error("internal error: could not find `%s'", + tabh[i].key_h->name_k); + break; + } + fprintf(out, "#line %u \"%s\"\n", kw->line, filename); + fprintf(out, " {\"%s\"%s}", kw->name, kw->args); + } else + fprintf(out, " { NULL }"); + + if (i < nkeys-1) + fprintf(out, ","); + fprintf(out, "\n"); + } + fprintf(out, " };\n"); + + /* output the hash tab[] array */ + make_c_tab(out, tab, smax, blen, scramble); + + /* The hash function body */ + fprintf(out, " const struct %s *ret;\n", struct_name); + for (i=0; i<final.used; ++i) + fprintf(out, "%s", final.line[i]); + fprintf(out, " if (rsl >= %lu) return NULL;\n", nkeys); + fprintf(out, " ret = &pd[rsl];\n"); + fprintf(out, " if (strcmp(key, ret->name) != 0) return NULL;\n"); + fprintf(out, " return ret;\n"); + fprintf(out, "}\n"); + fprintf(out, "\n"); + + free(tab); + free(tabh); +} + +int +main(int argc, char *argv[]) +{ + FILE *in, *out; + size_t i; + char *ch; + static char line[1024], tmp[1024]; + static char struct_name[128] = ""; + static char lookup_function_name[128] = "in_word_set"; + static char language[16] = ""; + static char delimiters[16] = ",\r\n"; + static char name[128]; + static char filename[768]; + int need_struct = 0; + int have_struct = 0; + int go_keywords = 0; + int ignore_case = 0; + int compare_strncmp = 0; + int readonly_tables = 0; + slist usercode, usercode2; + keyword_list keywords; + sval *sv; + keyword *kw; + + if (argc != 3) { + fprintf(stderr, "Usage: genperf <in> <out>\n"); + return EXIT_FAILURE; + } + + in = fopen(argv[1], "rt"); + if (!in) { + fprintf(stderr, "Could not open `%s' for reading\n", argv[1]); + return EXIT_FAILURE; + } + + ch = argv[1]; + i = 0; + while (*ch && i < 767) { + if (*ch == '\\') { + filename[i++] = '/'; + ch++; + } else + filename[i++] = *ch++; + } + filename[i] = '\0'; + + STAILQ_INIT(&usercode); + STAILQ_INIT(&usercode2); + STAILQ_INIT(&keywords); + + /* Parse declarations section */ + while (fgets(line, 1024, in)) { + /* Comments start with # as the first thing on a line */ + if (line[0] == '#') { + cur_line++; + continue; + } + + /* Handle structure declaration */ + if (strncmp(line, "struct", 6) == 0) { + int braces; + + if (!need_struct) { + report_error("struct without %%struct-type declaration"); + return EXIT_FAILURE; + } + if (have_struct) { + report_error("more than one struct declaration"); + return EXIT_FAILURE; + } + have_struct = 1; + + /* copy struct name */ + ch = &line[6]; + while (isspace(*ch)) + ch++; + i = 0; + while ((isalnum(*ch) || *ch == '_') && i < 127) + struct_name[i++] = *ch++; + if (i == 127) { + report_error("struct name too long"); + return EXIT_FAILURE; + } + struct_name[i] = '\0'; + + sv = yasm_xmalloc(sizeof(sval)); + sprintf(tmp, "#line %u \"%s\"\n", cur_line, filename); + sv->str = yasm__xstrdup(tmp); + STAILQ_INSERT_TAIL(&usercode, sv, link); + + braces = 0; + do { + /* count braces to determine when we're done */ + ch = line; + while (*ch != '\0') { + if (*ch == '{') + braces++; + if (*ch == '}') + braces--; + ch++; + } + sv = yasm_xmalloc(sizeof(sval)); + sv->str = yasm__xstrdup(line); + STAILQ_INSERT_TAIL(&usercode, sv, link); + cur_line++; + if (braces <= 0) + break; + } while (fgets(line, 1024, in)); + cur_line++; + continue; + } + + /* Ignore non-declaration lines */ + if (line[0] != '%') { + cur_line++; + continue; + } + + /* %% terminates declarations section */ + if (line[1] == '%') { + if (need_struct && !have_struct) { + report_error("%%struct-type declaration, but no struct found"); + return EXIT_FAILURE; + } + go_keywords = 1; + break; /* move on to keywords section */ + } + + /* %{ begins a verbatim code section that ends with %} */ + if (line[1] == '{') { + sv = yasm_xmalloc(sizeof(sval)); + sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename); + sv->str = yasm__xstrdup(tmp); + STAILQ_INSERT_TAIL(&usercode, sv, link); + + while (fgets(line, 1024, in)) { + cur_line++; + if (line[0] == '%' && line[1] == '}') + break; + sv = yasm_xmalloc(sizeof(sval)); + sv->str = yasm__xstrdup(line); + STAILQ_INSERT_TAIL(&usercode, sv, link); + } + cur_line++; + continue; + } + + if (strncmp(&line[1], "ignore-case", 11) == 0) { + ignore_case = 1; + } else if (strncmp(&line[1], "compare-strncmp", 15) == 0) { + compare_strncmp = 1; + } else if (strncmp(&line[1], "readonly-tables", 15) == 0) { + readonly_tables = 1; + } else if (strncmp(&line[1], "language=", 9) == 0) { + ch = &line[10]; + i = 0; + while (*ch != '\n' && i<15) + language[i++] = *ch++; + language[i] = '\0'; + } else if (strncmp(&line[1], "delimiters=", 11) == 0) { + ch = &line[12]; + i = 0; + while (i<15) + delimiters[i++] = *ch++; + delimiters[i] = '\0'; + } else if (strncmp(&line[1], "enum", 4) == 0) { + /* unused */ + } else if (strncmp(&line[1], "struct-type", 11) == 0) { + need_struct = 1; + } else if (strncmp(&line[1], "define", 6) == 0) { + /* Several different defines we need to handle */ + ch = &line[7]; + while (isspace(*ch)) + ch++; + + if (strncmp(ch, "hash-function-name", 18) == 0) { + /* unused */ + } else if (strncmp(ch, "lookup-function-name", 20) == 0) { + ch = &line[7+20+1]; + while (isspace(*ch)) + ch++; + i = 0; + while ((isalnum(*ch) || *ch == '_') && i < 127) + lookup_function_name[i++] = *ch++; + if (i == 127) { + report_error("struct name too long"); + return EXIT_FAILURE; + } + lookup_function_name[i] = '\0'; + } else { + fprintf(stderr, "%u: unrecognized define `%s'\n", cur_line, + line); + } + } else { + fprintf(stderr, "%u: unrecognized declaration `%s'\n", cur_line, + line); + } + + cur_line++; + } + + if (!go_keywords) { + report_error("no keywords section found"); + return EXIT_FAILURE; + } + + /* Parse keywords section */ + while (fgets(line, 1024, in)) { + char *d; + + /* Comments start with # as the first thing on a line */ + if (line[0] == '#') { + cur_line++; + continue; + } + + /* Keywords section terminated with %% */ + if (line[0] == '%' && line[1] == '%') + break; + + /* Look for name */ + ch = &line[0]; + i = 0; + while (strchr(delimiters, *ch) == NULL && i < 127) + name[i++] = *ch++; + if (i == 127) { + report_error("keyword name too long"); + return EXIT_FAILURE; + } + name[i] = '\0'; + + /* Strip EOL */ + d = strrchr(ch, '\n'); + if (d) + *d = '\0'; + d = strrchr(ch, '\r'); + if (d) + *d = '\0'; + kw = yasm_xmalloc(sizeof(keyword)); + kw->name = yasm__xstrdup(name); + kw->args = yasm__xstrdup(ch); + kw->line = cur_line; + STAILQ_INSERT_TAIL(&keywords, kw, link); + cur_line++; + } + + if (errors > 0) + return EXIT_FAILURE; + + /* Pull in any end code */ + if (!feof(in)) { + sv = yasm_xmalloc(sizeof(sval)); + sprintf(tmp, "#line %u \"%s\"\n\n", cur_line, filename); + sv->str = yasm__xstrdup(tmp); + STAILQ_INSERT_TAIL(&usercode2, sv, link); + + while (fgets(line, 1024, in)) { + sv = yasm_xmalloc(sizeof(sval)); + sv->str = yasm__xstrdup(line); + STAILQ_INSERT_TAIL(&usercode2, sv, link); + } + } + + /* output code */ + out = fopen(argv[2], "wt"); + if (!out) { + fprintf(stderr, "Could not open `%s' for writing\n", argv[2]); + return EXIT_FAILURE; + } + + fprintf(out, "/* %s code produced by genperf */\n", language); + fprintf(out, "/* Command-line: genperf %s %s */\n", argv[1], argv[2]); + + STAILQ_FOREACH(sv, &usercode, link) + fprintf(out, "%s", sv->str); + + /* Get perfect hash */ + perfect_gen(out, lookup_function_name, struct_name, &keywords, filename); + + STAILQ_FOREACH(sv, &usercode2, link) + fprintf(out, "%s", sv->str); + + fclose(out); + + if (errors > 0) { + remove(argv[2]); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + diff --git a/tools/genperf/perfect.c b/tools/genperf/perfect.c new file mode 100644 index 0000000..7cd6867 --- /dev/null +++ b/tools/genperf/perfect.c @@ -0,0 +1,1178 @@ +/* Modified for use with yasm by Peter Johnson. */ +/* +------------------------------------------------------------------------------ +perfect.c: code to generate code for a hash for perfect hashing. +(c) Bob Jenkins, September 1996, December 1999 +You may use this code in any way you wish, and it is free. No warranty. +I hereby place this in the public domain. +Source is http://burtleburtle.net/bob/c/perfect.c + +This generates a minimal perfect hash function. That means, given a +set of n keys, this determines a hash function that maps each of +those keys into a value in 0..n-1 with no collisions. + +The perfect hash function first uses a normal hash function on the key +to determine (a,b) such that the pair (a,b) is distinct for all +keys, then it computes a^scramble[tab[b]] to get the final perfect hash. +tab[] is an array of 1-byte values and scramble[] is a 256-term array of +2-byte or 4-byte values. If there are n keys, the length of tab[] is a +power of two between n/3 and n. + +I found the idea of computing distinct (a,b) values in "Practical minimal +perfect hash functions for large databases", Fox, Heath, Chen, and Daoud, +Communications of the ACM, January 1992. They found the idea in Chichelli +(CACM Jan 1980). Beyond that, our methods differ. + +The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in +0..*blen*-1. A fast hash function determines both a and b +simultaneously. Any decent hash function is likely to produce +hashes so that (a,b) is distinct for all pairs. I try the hash +using different values of *salt* until all pairs are distinct. + +The final hash is (a XOR scramble[tab[b]]). *scramble* is a +predetermined mapping of 0..255 into 0..smax-1. *tab* is an +array that we fill in in such a way as to make the hash perfect. + +First we fill in all values of *tab* that are used by more than one +key. We try all possible values for each position until one works. + +This leaves m unmapped keys and m values that something could hash to. +If you treat unmapped keys as lefthand nodes and unused hash values +as righthand nodes, and draw a line connecting each key to each hash +value it could map to, you get a bipartite graph. We attempt to +find a perfect matching in this graph. If we succeed, we have +determined a perfect hash for the whole set of keys. + +*scramble* is used because (a^tab[i]) clusters keys around *a*. +------------------------------------------------------------------------------ +*/ + +#include <string.h> +#include "tools/genperf/standard.h" +#include "libyasm/coretype.h" +#include "libyasm/phash.h" +#include "tools/genperf/perfect.h" + +#define CHECKSTATE 8 + +/* +------------------------------------------------------------------------------ +Find the mapping that will produce a perfect hash +------------------------------------------------------------------------------ +*/ + +/* return the ceiling of the log (base 2) of val */ +ub4 phash_log2(val) +ub4 val; +{ + ub4 i; + for (i=0; ((ub4)1<<i) < val; ++i) + ; + return i; +} + +/* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */ +/* permute(0)=0. This is intended and useful. */ +static ub4 permute( + ub4 x, /* input, a value in some range */ + ub4 nbits) /* input, number of bits in range */ +{ + int i; + int mask = ((ub4)1<<nbits)-1; /* all ones */ + int const2 = 1+nbits/2; + int const3 = 1+nbits/3; + int const4 = 1+nbits/4; + int const5 = 1+nbits/5; + for (i=0; i<20; ++i) + { + x = (x+(x<<const2)) & mask; + x = (x^(x>>const3)); + x = (x+(x<<const4)) & mask; + x = (x^(x>>const5)); + } + return x; +} + +/* initialize scramble[] with distinct random values in 0..smax-1 */ +static void scrambleinit( + ub4 *scramble, /* hash is a^scramble[tab[b]] */ + ub4 smax) /* scramble values should be in 0..smax-1 */ +{ + ub4 i; + + /* fill scramble[] with distinct random integers in 0..smax-1 */ + for (i=0; i<SCRAMBLE_LEN; ++i) + { + scramble[i] = permute(i, phash_log2(smax)); + } +} + +/* + * Check if key1 and key2 are the same. + * We already checked (a,b) are the same. + */ +static void checkdup( + key *key1, + key *key2, + hashform *form) +{ + switch(form->hashtype) + { + case STRING_HT: + if ((key1->len_k == key2->len_k) && + !memcmp(key1->name_k, key2->name_k, (size_t)key1->len_k)) + { + fprintf(stderr, "perfect.c: Duplicates keys! %.*s\n", + (int)key1->len_k, key1->name_k); + exit(EXIT_FAILURE); + } + break; + case INT_HT: + if (key1->hash_k == key2->hash_k) + { + fprintf(stderr, "perfect.c: Duplicate keys! %.8lx\n", key1->hash_k); + exit(EXIT_FAILURE); + } + break; + case AB_HT: + fprintf(stderr, "perfect.c: Duplicate keys! %.8lx %.8lx\n", + key1->a_k, key1->b_k); + exit(EXIT_FAILURE); + break; + default: + fprintf(stderr, "perfect.c: Illegal hash type %ld\n", (ub4)form->hashtype); + exit(EXIT_FAILURE); + break; + } +} + + +/* + * put keys in tabb according to key->b_k + * check if the initial hash might work + */ +static int inittab( + bstuff *tabb, /* output, list of keys with b for (a,b) */ + ub4 blen, /* length of tabb */ + key *keys, /* list of keys already hashed */ + hashform *form, /* user directives */ + int complete) /* TRUE means to complete init despite collisions */ +{ + int nocollision = TRUE; + key *mykey; + + memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen)); + + /* Two keys with the same (a,b) guarantees a collision */ + for (mykey=keys; mykey; mykey=mykey->next_k) + { + key *otherkey; + + for (otherkey=tabb[mykey->b_k].list_b; + otherkey; + otherkey=otherkey->nextb_k) + { + if (mykey->a_k == otherkey->a_k) + { + nocollision = FALSE; + checkdup(mykey, otherkey, form); + if (!complete) + return FALSE; + } + } + ++tabb[mykey->b_k].listlen_b; + mykey->nextb_k = tabb[mykey->b_k].list_b; + tabb[mykey->b_k].list_b = mykey; + } + + /* no two keys have the same (a,b) pair */ + return nocollision; +} + + +/* Do the initial hash for normal mode (use lookup and checksum) */ +static void initnorm( + key *keys, /* list of all keys */ + ub4 alen, /* (a,b) has a in 0..alen-1, a power of 2 */ + ub4 blen, /* (a,b) has b in 0..blen-1, a power of 2 */ + ub4 smax, /* maximum range of computable hash values */ + ub4 salt, /* used to initialize the hash function */ + gencode *final) /* output, code for the final hash */ +{ + key *mykey; + if (phash_log2(alen)+phash_log2(blen) > UB4BITS) + { + ub4 initlev = (salt*0x9e3779b9)&0xffffffff; /* the golden ratio; an arbitrary value */ + + for (mykey=keys; mykey; mykey=mykey->next_k) + { + ub4 i, state[CHECKSTATE]; + for (i=0; i<CHECKSTATE; ++i) state[i] = initlev; + phash_checksum( mykey->name_k, mykey->len_k, state); + mykey->a_k = state[0]&(alen-1); + mykey->b_k = state[1]&(blen-1); + } + final->used = 4; + sprintf(final->line[0], + " unsigned long i,state[CHECKSTATE],rsl;\n"); + sprintf(final->line[1], + " for (i=0; i<CHECKSTATE; ++i) state[i]=0x%lx;\n",initlev); + sprintf(final->line[2], + " phash_checksum(key, len, state);\n"); + sprintf(final->line[3], + " rsl = ((state[0]&0x%lx)^scramble[tab[state[1]&0x%lx]]);\n", + alen-1, blen-1); + } + else + { + ub4 loga = phash_log2(alen); /* log based 2 of blen */ + ub4 initlev = (salt*0x9e3779b9)&0xffffffff; /* the golden ratio; an arbitrary value */ + + for (mykey=keys; mykey; mykey=mykey->next_k) + { + ub4 hash = phash_lookup(mykey->name_k, mykey->len_k, initlev); + mykey->a_k = (loga > 0) ? hash>>(UB4BITS-loga) : 0; + mykey->b_k = (blen > 1) ? hash&(blen-1) : 0; + } + final->used = 2; + sprintf(final->line[0], + " unsigned long rsl, val = phash_lookup(key, len, 0x%lxUL);\n", initlev); + if (smax <= 1) + { + sprintf(final->line[1], " rsl = 0;\n"); + } + else if (blen < USE_SCRAMBLE) + { + sprintf(final->line[1], " rsl = ((val>>%ld)^tab[val&0x%lx]);\n", + UB4BITS-phash_log2(alen), blen-1); + } + else + { + sprintf(final->line[1], " rsl = ((val>>%ld)^scramble[tab[val&0x%lx]]);\n", + UB4BITS-phash_log2(alen), blen-1); + } + } +} + + + +/* Do initial hash for inline mode */ +static void initinl( + key *keys, /* list of all keys */ + ub4 alen, /* (a,b) has a in 0..alen-1, a power of 2 */ + ub4 blen, /* (a,b) has b in 0..blen-1, a power of 2 */ + ub4 smax, /* range of computable hash values */ + ub4 salt, /* used to initialize the hash function */ + gencode *final) /* generated code for final hash */ +{ + key *mykey; + ub4 amask = alen-1; + ub4 blog = phash_log2(blen); + ub4 initval = salt*0x9e3779b9; /* the golden ratio; an arbitrary value */ + + /* It's more important to have b uniform than a, so b is the low bits */ + for (mykey = keys; mykey != (key *)0; mykey = mykey->next_k) + { + ub4 hash = initval; + ub4 i; + for (i=0; i<mykey->len_k; ++i) + { + hash = ((ub1)mykey->name_k[i] ^ hash) + ((hash<<(UB4BITS-6))+(hash>>6)); + } + mykey->hash_k = hash; + mykey->a_k = (alen > 1) ? (hash & amask) : 0; + mykey->b_k = (blen > 1) ? (hash >> (UB4BITS-blog)) : 0; + } + final->used = 1; + if (smax <= 1) + { + sprintf(final->line[0], " unsigned long rsl = 0;\n"); + } + else if (blen < USE_SCRAMBLE) + { + sprintf(final->line[0], " unsigned long rsl = ((val & 0x%lx) ^ tab[val >> %ld]);\n", + amask, UB4BITS-blog); + } + else + { + sprintf(final->line[0], " unsigned long rsl = ((val & 0x%lx) ^ scramble[tab[val >> %ld]]);\n", + amask, UB4BITS-blog); + } +} + + +/* + * Run a hash function on the key to get a and b + * Returns: + * 0: didn't find distinct (a,b) for all keys + * 1: found distinct (a,b) for all keys, put keys in tabb[] + * 2: found a perfect hash, no need to do any more work + */ +static ub4 initkey( + key *keys, /* list of all keys */ + ub4 nkeys, /* total number of keys */ + bstuff *tabb, /* stuff indexed by b */ + ub4 alen, /* (a,b) has a in 0..alen-1, a power of 2 */ + ub4 blen, /* (a,b) has b in 0..blen-1, a power of 2 */ + ub4 smax, /* range of computable hash values */ + ub4 salt, /* used to initialize the hash function */ + hashform *form, /* user directives */ + gencode *final) /* code for final hash */ +{ + /* Do the initial hash of the keys */ + switch(form->mode) + { + case NORMAL_HM: + initnorm(keys, alen, blen, smax, salt, final); + break; + case INLINE_HM: + initinl(keys, alen, blen, smax, salt, final); + break; +#if 0 + case HEX_HM: + case DECIMAL_HM: + finished = inithex(keys, nkeys, alen, blen, smax, salt, final, form); + if (finished) return 2; + break; +#endif + default: + fprintf(stderr, "fatal error: illegal mode\n"); + exit(1); + } + + if (nkeys <= 1) + { + final->used = 1; + sprintf(final->line[0], " unsigned long rsl = 0;\n"); + return 2; + } + + return inittab(tabb, blen, keys, form, FALSE); +} + +/* Print an error message and exit if there are duplicates */ +static void duplicates( + bstuff *tabb, /* array of lists of keys with the same b */ + ub4 blen, /* length of tabb, a power of 2 */ + key *keys, + hashform *form) /* user directives */ +{ + ub4 i; + key *key1; + key *key2; + + (void)inittab(tabb, blen, keys, form, TRUE); + + /* for each b, do nested loops through key list looking for duplicates */ + for (i=0; i<blen; ++i) + for (key1=tabb[i].list_b; key1; key1=key1->nextb_k) + for (key2=key1->nextb_k; key2; key2=key2->nextb_k) + checkdup(key1, key2, form); +} + + +/* Try to apply an augmenting list */ +static int apply( + bstuff *tabb, + hstuff *tabh, + qstuff *tabq, + ub4 blen, + ub4 *scramble, + ub4 tail, + int rollback) /* FALSE applies augmenting path, TRUE rolls back */ +{ + ub4 hash; + key *mykey; + bstuff *pb; + ub4 child; + ub4 parent; + ub4 stabb; /* scramble[tab[b]] */ + + /* walk from child to parent */ + for (child=tail-1; child; child=parent) + { + parent = tabq[child].parent_q; /* find child's parent */ + pb = tabq[parent].b_q; /* find parent's list of siblings */ + + /* erase old hash values */ + stabb = scramble[pb->val_b]; + for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k) + { + hash = mykey->a_k^stabb; + if (mykey == tabh[hash].key_h) + { /* erase hash for all of child's siblings */ + tabh[hash].key_h = (key *)0; + } + } + + /* change pb->val_b, which will change the hashes of all parent siblings */ + pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q); + + /* set new hash values */ + stabb = scramble[pb->val_b]; + for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k) + { + hash = mykey->a_k^stabb; + if (rollback) + { + if (parent == 0) continue; /* root never had a hash */ + } + else if (tabh[hash].key_h) + { + /* very rare: roll back any changes */ + apply(tabb, tabh, tabq, blen, scramble, tail, TRUE); + return FALSE; /* failure, collision */ + } + tabh[hash].key_h = mykey; + } + } + return TRUE; +} + + +/* +------------------------------------------------------------------------------- +augment(): Add item to the mapping. + +Construct a spanning tree of *b*s with *item* as root, where each +parent can have all its hashes changed (by some new val_b) with +at most one collision, and each child is the b of that collision. + +I got this from Tarjan's "Data Structures and Network Algorithms". The +path from *item* to a *b* that can be remapped with no collision is +an "augmenting path". Change values of tab[b] along the path so that +the unmapped key gets mapped and the unused hash value gets used. + +Assuming 1 key per b, if m out of n hash values are still unused, +you should expect the transitive closure to cover n/m nodes before +an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect +this approach to take about nlogn time to map all single-key b's. +------------------------------------------------------------------------------- +*/ +static int augment( + bstuff *tabb, /* stuff indexed by b */ + hstuff *tabh, /* which key is associated with which hash, indexed by hash */ + qstuff *tabq, /* queue of *b* values, this is the spanning tree */ + ub4 blen, /* length of tabb */ + ub4 *scramble, /* final hash is a^scramble[tab[b]] */ + ub4 smax, /* highest value in scramble */ + bstuff *item, /* &tabb[b] for the b to be mapped */ + ub4 nkeys, /* final hash must be in 0..nkeys-1 */ + ub4 highwater, /* a value higher than any now in tabb[].water_b */ + hashform *form) /* TRUE if we should do a minimal perfect hash */ +{ + ub4 q; /* current position walking through the queue */ + ub4 tail; /* tail of the queue. 0 is the head of the queue. */ + ub4 limit=((blen < USE_SCRAMBLE) ? smax : UB1MAXVAL+1); + ub4 highhash = ((form->perfect == MINIMAL_HP) ? nkeys : smax); + int trans = (form->speed == SLOW_HS || form->perfect == MINIMAL_HP); + + /* initialize the root of the spanning tree */ + tabq[0].b_q = item; + tail = 1; + + /* construct the spanning tree by walking the queue, add children to tail */ + for (q=0; q<tail; ++q) + { + bstuff *myb = tabq[q].b_q; /* the b for this node */ + ub4 i; /* possible value for myb->val_b */ + + if (!trans && (q == 1)) + break; /* don't do transitive closure */ + + for (i=0; i<limit; ++i) + { + bstuff *childb = (bstuff *)0; /* the b that this i maps to */ + key *mykey; /* for walking through myb's keys */ + + for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k) + { + key *childkey; + ub4 hash = mykey->a_k^scramble[i]; + + if (hash >= highhash) break; /* out of bounds */ + childkey = tabh[hash].key_h; + + if (childkey) + { + bstuff *hitb = &tabb[childkey->b_k]; + + if (childb) + { + if (childb != hitb) break; /* hit at most one child b */ + } + else + { + childb = hitb; /* remember this as childb */ + if (childb->water_b == highwater) break; /* already explored */ + } + } + } + if (mykey) continue; /* myb with i has multiple collisions */ + + /* add childb to the queue of reachable things */ + if (childb) childb->water_b = highwater; + tabq[tail].b_q = childb; + tabq[tail].newval_q = (ub2)i; /* how to make parent (myb) use this hash */ + tabq[tail].oldval_q = myb->val_b; /* need this for rollback */ + tabq[tail].parent_q = q; + ++tail; + + if (!childb) + { /* found an *i* with no collisions? */ + /* try to apply the augmenting path */ + if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE)) + return TRUE; /* success, item was added to the perfect hash */ + + --tail; /* don't know how to handle such a child! */ + } + } + } + return FALSE; +} + + +/* find a mapping that makes this a perfect hash */ +static int perfect( + bstuff *tabb, + hstuff *tabh, + qstuff *tabq, + ub4 blen, + ub4 smax, + ub4 *scramble, + ub4 nkeys, + hashform *form) +{ + ub4 maxkeys; /* maximum number of keys for any b */ + ub4 i, j; + + /* clear any state from previous attempts */ + memset((void *)tabh, 0, + (size_t)(sizeof(hstuff)* + ((form->perfect == MINIMAL_HP) ? nkeys : smax))); + memset((void *)tabq, 0, (size_t)(sizeof(qstuff)*(blen+1))); + + for (maxkeys=0,i=0; i<blen; ++i) + if (tabb[i].listlen_b > maxkeys) + maxkeys = tabb[i].listlen_b; + + /* In descending order by number of keys, map all *b*s */ + for (j=maxkeys; j>0; --j) + for (i=0; i<blen; ++i) + if (tabb[i].listlen_b == j) + if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys, + i+1, form)) + { + fprintf(stderr, "fail to map group of size %ld for tab size %ld\n", j, blen); + return FALSE; + } + + /* Success! We found a perfect hash of all keys into 0..nkeys-1. */ + return TRUE; +} + + +/* + * Simple case: user gave (a,b). No more mixing, no guessing alen or blen. + * This assumes a,b reside in (key->a_k, key->b_k), and final->form == AB_HK. + */ +static void hash_ab( + bstuff **tabb, /* output, tab[] of the perfect hash, length *blen */ + ub4 *alen, /* output, 0..alen-1 is range for a of (a,b) */ + ub4 *blen, /* output, 0..blen-1 is range for b of (a,b) */ + ub4 *salt, /* output, initializes initial hash */ + gencode *final, /* code for final hash */ + ub4 *scramble, /* input, hash = a^scramble[tab[b]] */ + ub4 *smax, /* input, scramble[i] in 0..smax-1 */ + key *keys, /* input, keys to hash */ + ub4 nkeys, /* input, number of keys being hashed */ + hashform *form) /* user directives */ +{ + hstuff *tabh; + qstuff *tabq; + key *mykey; + ub4 i; + int used_tab; + + /* initially make smax the first power of two bigger than nkeys */ + *smax = ((ub4)1<<phash_log2(nkeys)); + scrambleinit(scramble, *smax); + + /* set *alen and *blen based on max A and B from user */ + *alen = 1; + *blen = 1; + for (mykey = keys; mykey != (key *)0; mykey = mykey->next_k) + { + while (*alen <= mykey->a_k) *alen *= 2; + while (*blen <= mykey->b_k) *blen *= 2; + } + if (*alen > 2**smax) + { + fprintf(stderr, + "perfect.c: Can't deal with (A,B) having A bigger than twice \n"); + fprintf(stderr, + " the smallest power of two greater or equal to any legal hash.\n"); + exit(EXIT_FAILURE); + } + + /* allocate working memory */ + *tabb = (bstuff *)yasm_xmalloc((size_t)(sizeof(bstuff)*(*blen))); + tabq = (qstuff *)yasm_xmalloc(sizeof(qstuff)*(*blen+1)); + tabh = (hstuff *)yasm_xmalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ? + nkeys : *smax)); + + /* check that (a,b) are distinct and put them in tabb indexed by b */ + (void)inittab(*tabb, *blen, keys, form, FALSE); + + /* try with smax */ + if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form)) + { + if (form->perfect == MINIMAL_HP) + { + fprintf(stderr, "fatal error: Cannot find perfect hash for user (A,B) pairs\n"); + exit(EXIT_FAILURE); + } + else + { + /* try with 2*smax */ + free((void *)tabh); + *smax = *smax * 2; + scrambleinit(scramble, *smax); + tabh = (hstuff *)yasm_xmalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ? + nkeys : *smax)); + if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form)) + { + fprintf(stderr, "fatal error: Cannot find perfect hash for user (A,B) pairs\n"); + exit(EXIT_FAILURE); + } + } + } + + /* check if tab[] was really needed */ + for (i=0; i<*blen; ++i) + { + if ((*tabb)[i].val_b != 0) break; /* assumes permute(0) == 0 */ + } + used_tab = (i < *blen); + + /* write the code for the perfect hash */ + *salt = 1; + final->used = 1; + if (!used_tab) + { + sprintf(final->line[0], " unsigned long rsl = a;\n"); + } + else if (*blen < USE_SCRAMBLE) + { + sprintf(final->line[0], " unsigned long rsl = (a ^ tab[b]);\n"); + } + else + { + sprintf(final->line[0], " unsigned long rsl = (a ^ scramble[tab[b]]);\n"); + } + + free((void *)tabq); + free((void *)tabh); +} + + +/* guess initial values for alen and blen */ +static void initalen( + ub4 *alen, /* output, initial alen */ + ub4 *blen, /* output, initial blen */ + ub4 *smax,/* input, power of two greater or equal to max hash value */ + ub4 nkeys, /* number of keys being hashed */ + hashform *form) /* user directives */ +{ + /* + * Find initial *alen, *blen + * Initial alen and blen values were found empirically. Some factors: + * + * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1. + * + * alen and blen must be powers of 2 because the values in 0..alen-1 and + * 0..blen-1 are produced by applying a bitmask to the initial hash function. + * + * alen must be less than smax, in fact less than nkeys, because otherwise + * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for + * all the *a*s associated with a given *b*, so there would be no legal + * value to assign to tab[b]. This only matters when we're doing a minimal + * perfect hash. + * + * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8) + * and alen*blen = smax*smax/32. + * + * Values of blen less than smax/4 never work, and smax/2 always works. + * + * We want blen as small as possible because it is the number of bytes in + * the huge array we must create for the perfect hash. + * + * When nkey <= smax*(5/8), blen=smax/4 works much more often with + * alen=smax/8 than with alen=smax/4. Above smax*(5/8), blen=smax/4 + * doesn't seem to care whether alen=smax/8 or alen=smax/4. I think it + * has something to do with 5/8 = 1/8 * 5. For example examine 80000, + * 85000, and 90000 keys with different values of alen. This only matters + * if we're doing a minimal perfect hash. + * + * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer. + * Bigger than that it must produce two integers, which increases the + * cost of the hash per character hashed. + */ + if (form->perfect == NORMAL_HP) + { + if ((form->speed == FAST_HS) && (nkeys > *smax*0.8)) + { + *smax = *smax * 2; + } + + *alen = ((form->hashtype==INT_HT) && *smax>131072) ? + ((ub4)1<<(UB4BITS-phash_log2(*blen))) : /* distinct keys => distinct (A,B) */ + *smax; /* no reason to restrict alen to smax/2 */ + if ((form->hashtype == INT_HT) && *smax < 32) + *blen = *smax; /* go for function speed not space */ + else if (*smax/4 <= (1<<14)) + *blen = ((nkeys <= *smax*0.56) ? *smax/32 : + (nkeys <= *smax*0.74) ? *smax/16 : *smax/8); + else + *blen = ((nkeys <= *smax*0.6) ? *smax/16 : + (nkeys <= *smax*0.8) ? *smax/8 : *smax/4); + + if ((form->speed == FAST_HS) && (*blen < *smax/8)) + *blen = *smax/8; + + if (*alen < 1) *alen = 1; + if (*blen < 1) *blen = 1; + } + else + { + switch(phash_log2(*smax)) + { + case 0: + *alen = 1; + *blen = 1; + case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: + *alen = (form->perfect == NORMAL_HP) ? *smax : *smax/2; + *blen = *smax/2; + break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 16: + case 17: + if (form->speed == FAST_HS) + { + *alen = *smax/2; + *blen = *smax/4; + } + else if (*smax/4 < USE_SCRAMBLE) + { + *alen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4); + *blen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4); + } + else + { + *alen = ((nkeys <= *smax*(5.0/8.0)) ? *smax/8 : + (nkeys <= *smax*(3.0/4.0)) ? *smax/4 : *smax/2); + *blen = *smax/4; /* always give the small size a shot */ + } + break; + case 18: + if (form->speed == FAST_HS) + { + *alen = *smax/2; + *blen = *smax/2; + } + else + { + *alen = *smax/8; /* never require the multiword hash */ + *blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2; + } + break; + case 19: + case 20: + *alen = (nkeys <= *smax*(5.0/8.0)) ? *smax/8 : *smax/2; + *blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2; + break; + default: + *alen = *smax/2; /* just find a hash as quick as possible */ + *blen = *smax/2; /* we'll be thrashing virtual memory at this size */ + break; + } + } +} + +/* +** Try to find a perfect hash function. +** Return the successful initializer for the initial hash. +** Return 0 if no perfect hash could be found. +*/ +void findhash( + bstuff **tabb, /* output, tab[] of the perfect hash, length *blen */ + hstuff **tabh, /* output, table of keys indexed by hash value */ + ub4 *alen, /* output, 0..alen-1 is range for a of (a,b) */ + ub4 *blen, /* output, 0..blen-1 is range for b of (a,b) */ + ub4 *salt, /* output, initializes initial hash */ + gencode *final, /* code for final hash */ + ub4 *scramble, /* input, hash = a^scramble[tab[b]] */ + ub4 *smax, /* input, scramble[i] in 0..smax-1 */ + key *keys, /* input, keys to hash */ + ub4 nkeys, /* input, number of keys being hashed */ + hashform *form) /* user directives */ +{ + ub4 bad_initkey; /* how many times did initkey fail? */ + ub4 bad_perfect; /* how many times did perfect fail? */ + ub4 trysalt; /* trial initializer for initial hash */ + ub4 maxalen; + qstuff *tabq; /* table of stuff indexed by queue value, used by augment */ + + /* The case of (A,B) supplied by the user is a special case */ + if (form->hashtype == AB_HT) + { + hash_ab(tabb, alen, blen, salt, final, + scramble, smax, keys, nkeys, form); + return; + } + + /* guess initial values for smax, alen and blen */ + *smax = ((ub4)1<<phash_log2(nkeys)); + initalen(alen, blen, smax, nkeys, form); + + scrambleinit(scramble, *smax); + + maxalen = (form->perfect == MINIMAL_HP) ? *smax/2 : *smax; + + /* allocate working memory */ + *tabb = (bstuff *)yasm_xmalloc((size_t)(sizeof(bstuff)*(*blen))); + tabq = (qstuff *)yasm_xmalloc(sizeof(qstuff)*(*blen+1)); + *tabh = (hstuff *)yasm_xmalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ? + nkeys : *smax)); + + /* Actually find the perfect hash */ + *salt = 0; + bad_initkey = 0; + bad_perfect = 0; + for (trysalt=1; ; ++trysalt) + { + ub4 rslinit; + /* Try to find distinct (A,B) for all keys */ + + rslinit = initkey(keys, nkeys, *tabb, *alen, *blen, *smax, trysalt, + form, final); + + if (rslinit == 2) + { /* initkey actually found a perfect hash, not just distinct (a,b) */ + *salt = 1; + *blen = 0; + break; + } + else if (rslinit == 0) + { + /* didn't find distinct (a,b) */ + if (++bad_initkey >= RETRY_INITKEY) + { + /* Try to put more bits in (A,B) to make distinct (A,B) more likely */ + if (*alen < maxalen) + { + *alen *= 2; + } + else if (*blen < *smax) + { + *blen *= 2; + free(tabq); + free(*tabb); + *tabb = (bstuff *)yasm_xmalloc((size_t)(sizeof(bstuff)*(*blen))); + tabq = (qstuff *)yasm_xmalloc((size_t)(sizeof(qstuff)*(*blen+1))); + } + else + { + duplicates(*tabb, *blen, keys, form); /* check for duplicates */ + fprintf(stderr, "fatal error: Cannot perfect hash: cannot find distinct (A,B)\n"); + exit(EXIT_FAILURE); + } + bad_initkey = 0; + bad_perfect = 0; + } + continue; /* two keys have same (a,b) pair */ + } + + /* Given distinct (A,B) for all keys, build a perfect hash */ + if (!perfect(*tabb, *tabh, tabq, *blen, *smax, scramble, nkeys, form)) + { + if ((form->hashtype != INT_HT && ++bad_perfect >= RETRY_PERFECT) || + (form->hashtype == INT_HT && ++bad_perfect >= RETRY_HEX)) + { + if (*blen < *smax) + { + *blen *= 2; + free(*tabb); + free(tabq); + *tabb = (bstuff *)yasm_xmalloc((size_t)(sizeof(bstuff)*(*blen))); + tabq = (qstuff *)yasm_xmalloc((size_t)(sizeof(qstuff)*(*blen+1))); + --trysalt; /* we know this salt got distinct (A,B) */ + } + else + { + fprintf(stderr, "fatal error: Cannot perfect hash: cannot build tab[]\n"); + exit(EXIT_FAILURE); + } + bad_perfect = 0; + } + continue; + } + + *salt = trysalt; + break; + } + + /* free working memory */ + free((void *)tabq); +} + +#if 0 +/* +------------------------------------------------------------------------------ +Input/output type routines +------------------------------------------------------------------------------ +*/ + +/* get the list of keys */ +static void getkeys(keys, nkeys, textroot, keyroot, form) +key **keys; /* list of all keys */ +ub4 *nkeys; /* number of keys */ +reroot *textroot; /* get space to store key text */ +reroot *keyroot; /* get space for keys */ +hashform *form; /* user directives */ +{ + key *mykey; + char *mytext; + mytext = (char *)renew(textroot); + *keys = 0; + *nkeys = 0; + while (fgets(mytext, MAXKEYLEN, stdin)) + { + mykey = (key *)renew(keyroot); + if (form->mode == AB_HM) + { + sscanf(mytext, "%lx %lx ", &mykey->a_k, &mykey->b_k); + } + else if (form->mode == ABDEC_HM) + { + sscanf(mytext, "%ld %ld ", &mykey->a_k, &mykey->b_k); + } + else if (form->mode == HEX_HM) + { + sscanf(mytext, "%lx ", &mykey->hash_k); + } + else if (form->mode == DECIMAL_HM) + { + sscanf(mytext, "%ld ", &mykey->hash_k); + } + else + { + mykey->name_k = (ub1 *)mytext; + mytext = (char *)renew(textroot); + mykey->len_k = (ub4)(strlen((char *)mykey->name_k)-1); + } + mykey->next_k = *keys; + *keys = mykey; + ++*nkeys; + } + redel(textroot, mytext); +} + +/* make the .c file */ +static void make_c(tab, smax, blen, scramble, final, form) +bstuff *tab; /* table indexed by b */ +ub4 smax; /* range of scramble[] */ +ub4 blen; /* b in 0..blen-1, power of 2 */ +ub4 *scramble; /* used in final hash */ +gencode *final; /* code for the final hash */ +hashform *form; /* user directives */ +{ + ub4 i; + FILE *f; + f = fopen("phash.c", "w"); + fprintf(f, "/* table for the mapping for the perfect hash */\n"); + fprintf(f, "#include \"lookupa.h\"\n"); + fprintf(f, "\n"); + if (blen >= USE_SCRAMBLE) + { + fprintf(f, "/* A way to make the 1-byte values in tab bigger */\n"); + if (smax > UB2MAXVAL+1) + { + fprintf(f, "unsigned long scramble[] = {\n"); + for (i=0; i<=UB1MAXVAL; i+=4) + fprintf(f, "0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n", + scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]); + } + else + { + fprintf(f, "unsigned short scramble[] = {\n"); + for (i=0; i<=UB1MAXVAL; i+=8) + fprintf(f, +"0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n", + scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3], + scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]); + } + fprintf(f, "};\n"); + fprintf(f, "\n"); + } + if (blen > 0) + { + fprintf(f, "/* small adjustments to _a_ to make values distinct */\n"); + + if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE) + fprintf(f, "unsigned char tab[] = {\n"); + else + fprintf(f, "unsigned short tab[] = {\n"); + + if (blen < 16) + { + for (i=0; i<blen; ++i) fprintf(f, "%3d,", scramble[tab[i].val_b]); + } + else if (blen <= 1024) + { + for (i=0; i<blen; i+=16) + fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n", + scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], + scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], + scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], + scramble[tab[i+6].val_b], scramble[tab[i+7].val_b], + scramble[tab[i+8].val_b], scramble[tab[i+9].val_b], + scramble[tab[i+10].val_b], scramble[tab[i+11].val_b], + scramble[tab[i+12].val_b], scramble[tab[i+13].val_b], + scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]); + } + else if (blen < USE_SCRAMBLE) + { + for (i=0; i<blen; i+=8) + fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n", + scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], + scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], + scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], + scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]); + } + else + { + for (i=0; i<blen; i+=16) + fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n", + tab[i+0].val_b, tab[i+1].val_b, + tab[i+2].val_b, tab[i+3].val_b, + tab[i+4].val_b, tab[i+5].val_b, + tab[i+6].val_b, tab[i+7].val_b, + tab[i+8].val_b, tab[i+9].val_b, + tab[i+10].val_b, tab[i+11].val_b, + tab[i+12].val_b, tab[i+13].val_b, + tab[i+14].val_b, tab[i+15].val_b); + } + fprintf(f, "};\n"); + fprintf(f, "\n"); + } + fprintf(f, "/* The hash function */\n"); + switch(form->mode) + { + case NORMAL_HM: + fprintf(f, "ub4 phash(key, len)\n"); + fprintf(f, "char *key;\n"); + fprintf(f, "int len;\n"); + break; + case INLINE_HM: + case HEX_HM: + case DECIMAL_HM: + fprintf(f, "ub4 phash(val)\n"); + fprintf(f, "ub4 val;\n"); + break; + case AB_HM: + case ABDEC_HM: + fprintf(f, "ub4 phash(a,b)\n"); + fprintf(f, "ub4 a;\n"); + fprintf(f, "ub4 b;\n"); + break; + } + fprintf(f, "{\n"); + for (i=0; i<final->used; ++i) + fprintf(f, final->line[i]); + fprintf(f, " return rsl;\n"); + fprintf(f, "}\n"); + fprintf(f, "\n"); + fclose(f); +} + +/* +------------------------------------------------------------------------------ +Read in the keys, find the hash, and write the .c and .h files +------------------------------------------------------------------------------ +*/ +static void driver(form) +hashform *form; /* user directives */ +{ + ub4 nkeys; /* number of keys */ + key *keys; /* head of list of keys */ + bstuff *tab; /* table indexed by b */ + ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */ + ub4 alen; /* a in 0..alen-1, a power of 2 */ + ub4 blen; /* b in 0..blen-1, a power of 2 */ + ub4 salt; /* a parameter to the hash function */ + reroot *textroot; /* MAXKEYLEN-character text lines */ + reroot *keyroot; /* source of keys */ + gencode final; /* code for final hash */ + ub4 i; + ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */ + char buf[10][80]; /* buffer for generated code */ + char *buf2[10]; /* also for generated code */ + + /* set up memory sources */ + textroot = remkroot((size_t)MAXKEYLEN); + keyroot = remkroot(sizeof(key)); + + /* set up code for final hash */ + final.line = buf2; + final.used = 0; + final.len = 10; + for (i=0; i<10; ++i) final.line[i] = buf[i]; + + /* read in the list of keywords */ + getkeys(&keys, &nkeys, textroot, keyroot, form); + + /* find the hash */ + findhash(&tab, &alen, &blen, &salt, &final, + scramble, &smax, keys, nkeys, form); + + /* generate the phash.c file */ + make_c(tab, smax, blen, scramble, &final, form); + + /* clean up memory sources */ + refree(textroot); + refree(keyroot); + free((void *)tab); +} + + +/* Interpret arguments and call the driver */ +/* See usage_error for the expected arguments */ +int main(argc, argv) +int argc; +char **argv; +{ + int mode_given = FALSE; + int minimal_given = FALSE; + int speed_given = FALSE; + hashform form; + char *c; + + /* default behavior */ + form.mode = NORMAL_HM; + form.hashtype = STRING_HT; + form.perfect = MINIMAL_HP; + form.speed = SLOW_HS; + + /* Generate the [minimal] perfect hash */ + driver(&form); + + return EXIT_SUCCESS; +} +#endif diff --git a/tools/genperf/perfect.h b/tools/genperf/perfect.h new file mode 100644 index 0000000..b78d943 --- /dev/null +++ b/tools/genperf/perfect.h @@ -0,0 +1,132 @@ +/* +------------------------------------------------------------------------------ +perfect.h: code to generate code for a hash for perfect hashing. +(c) Bob Jenkins, September 1996 +You may use this code in any way you wish, and it is free. No warranty. +I hereby place this in the public domain. +Source is http://burtleburtle.net/bob/c/perfect.h +------------------------------------------------------------------------------ +*/ + +#ifndef STANDARD +#include "standard.h" +#endif + +#ifndef PERFECT +#define PERFECT + +#define MAXKEYLEN 30 /* maximum length of a key */ +#define USE_SCRAMBLE 4096 /* use scramble if blen >= USE_SCRAMBLE */ +#define SCRAMBLE_LEN ((ub4)1<<16) /* length of *scramble* */ +#define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */ +#define RETRY_PERFECT 1 /* number of times to try to make a perfect hash */ +#define RETRY_HEX 200 /* RETRY_PERFECT when hex keys given */ + +/* the generated code for the final hash, assumes initial hash is done */ +struct gencode +{ + char **line; /* array of text lines, 80 bytes apiece */ + /* + * The code placed here must declare "ub4 rsl" + * and assign it the value of the perfect hash using the function inputs. + * Later code will be tacked on which returns rsl or manipulates it according + * to the user directives. + * + * This code is at the top of the routine; it may and must declare any + * local variables it needs. + * + * Each way of filling in **line should be given a comment that is a unique + * tag. A testcase named with that tag should also be found which tests + * the generated code. + */ + ub4 len; /* number of lines available for final hash */ + ub4 used; /* number of lines used by final hash */ + + ub4 lowbit; /* for HEX, lowest interesting bit */ + ub4 highbit; /* for HEX, highest interesting bit */ + ub4 diffbits; /* bits which differ for some key */ + ub4 i,j,k,l,m,n,o; /* state machine used in hexn() */ +}; +typedef struct gencode gencode; + +/* user directives: perfect hash? minimal perfect hash? input is an int? */ +struct hashform +{ + enum { + NORMAL_HM, /* key is a string */ + INLINE_HM, /* user will do initial hash, we must choose salt for them */ + HEX_HM, /* key to be hashed is a hexidecimal 4-byte integer */ + DECIMAL_HM, /* key to be hashed is a decimal 4-byte integer */ + AB_HM, /* key to be hashed is "A B", where A and B are (A,B) in hex */ + ABDEC_HM /* like AB_HM, but in decimal */ + } mode; + enum { + STRING_HT, /* key is a string */ + INT_HT, /* key is an integer */ + AB_HT /* dunno what key is, but input is distinct (A,B) pair */ + } hashtype; + enum { + NORMAL_HP, /* just find a perfect hash */ + MINIMAL_HP /* find a minimal perfect hash */ + } perfect; + enum { + FAST_HS, /* fast mode */ + SLOW_HS /* slow mode */ + } speed; +}; +typedef struct hashform hashform; + +/* representation of a key */ +struct key +{ + char *name_k; /* the actual key */ + ub4 len_k; /* the length of the actual key */ + ub4 hash_k; /* the initial hash value for this key */ + struct key *next_k; /* next key */ +/* beyond this point is mapping-dependent */ + ub4 a_k; /* a, of the key maps to (a,b) */ + ub4 b_k; /* b, of the key maps to (a,b) */ + struct key *nextb_k; /* next key with this b */ +}; +typedef struct key key; + +/* things indexed by b of original (a,b) pair */ +struct bstuff +{ + ub2 val_b; /* hash=a^tabb[b].val_b */ + key *list_b; /* tabb[i].list_b is list of keys with b==i */ + ub4 listlen_b; /* length of list_b */ + ub4 water_b; /* high watermark of who has visited this map node */ +}; +typedef struct bstuff bstuff; + +/* things indexed by final hash value */ +struct hstuff +{ + key *key_h; /* tabh[i].key_h is the key with a hash of i */ +}; +typedef struct hstuff hstuff; + +/* things indexed by queue position */ +struct qstuff +{ + bstuff *b_q; /* b that currently occupies this hash */ + ub4 parent_q; /* queue position of parent that could use this hash */ + ub2 newval_q; /* what to change parent tab[b] to to use this hash */ + ub2 oldval_q; /* original value of tab[b] */ +}; +typedef struct qstuff qstuff; + +/* return ceiling(log based 2 of x) */ +ub4 phash_log2(ub4 x); + +/* Given the keys, scramble[], and hash mode, find the perfect hash */ +void findhash(bstuff **tabb, hstuff **tabh, ub4 *alen, ub4 *blen, ub4 *salt, + gencode *final, ub4 *scramble, ub4 *smax, key *keys, ub4 nkeys, + hashform *form); + +/* private, but in a different file because it's excessively verbose */ +int inithex(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub4 salt, + gencode *final, hashform *form); + +#endif /* PERFECT */ diff --git a/tools/genperf/standard.h b/tools/genperf/standard.h new file mode 100644 index 0000000..596b893 --- /dev/null +++ b/tools/genperf/standard.h @@ -0,0 +1,35 @@ +/* +------------------------------------------------------------------------------ +Standard definitions and types, Bob Jenkins +------------------------------------------------------------------------------ +*/ +#ifndef STANDARD +#define STANDARD + +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +typedef unsigned long int ub4; /* unsigned 4-byte quantities */ +#define UB4BITS 32 +typedef unsigned short int ub2; +#define UB2MAXVAL 0xffff +typedef unsigned char ub1; +#define UB1MAXVAL 0xff +typedef int word; /* fastest type available */ + +#define bis(target,mask) ((target) |= (mask)) +#define bic(target,mask) ((target) &= ~(mask)) +#define bit(target,mask) ((target) & (mask)) +#ifndef align +# define align(a) (((ub4)a+(sizeof(void *)-1))&(~(sizeof(void *)-1))) +#endif /* align */ + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#endif /* STANDARD */ diff --git a/tools/python-yasm/Makefile.inc b/tools/python-yasm/Makefile.inc new file mode 100644 index 0000000..fce135b --- /dev/null +++ b/tools/python-yasm/Makefile.inc @@ -0,0 +1,79 @@ +PYBINDING_DEPS = tools/python-yasm/bytecode.pxi +PYBINDING_DEPS += tools/python-yasm/errwarn.pxi +PYBINDING_DEPS += tools/python-yasm/expr.pxi +PYBINDING_DEPS += tools/python-yasm/floatnum.pxi +PYBINDING_DEPS += tools/python-yasm/intnum.pxi +PYBINDING_DEPS += tools/python-yasm/symrec.pxi +PYBINDING_DEPS += tools/python-yasm/value.pxi + +EXTRA_DIST += tools/python-yasm/pyxelator/cparse.py +EXTRA_DIST += tools/python-yasm/pyxelator/genpyx.py +EXTRA_DIST += tools/python-yasm/pyxelator/ir.py +EXTRA_DIST += tools/python-yasm/pyxelator/lexer.py +EXTRA_DIST += tools/python-yasm/pyxelator/node.py +EXTRA_DIST += tools/python-yasm/pyxelator/parse_core.py +EXTRA_DIST += tools/python-yasm/pyxelator/work_unit.py +EXTRA_DIST += tools/python-yasm/pyxelator/wrap_yasm.py +EXTRA_DIST += tools/python-yasm/setup.py +EXTRA_DIST += tools/python-yasm/yasm.pyx +EXTRA_DIST += $(PYBINDING_DEPS) + +if HAVE_PYTHON_BINDINGS + +# Use Pyxelator to generate Pyrex function headers. +_yasm.pxi: ${HEADERS} + @rm -rf .tmp + @mkdir .tmp + $(PYTHON) $(srcdir)/tools/python-yasm/pyxelator/wrap_yasm.py \ + "YASM_DIR=${srcdir}" "CPP=${CPP}" "CPPFLAGS=${CPPFLAGS}" + @rm -rf .tmp + +CLEANFILES += _yasm.pxi + +# Need to build a local copy of the main Pyrex input file to include _yasm.pxi +# from the build directory. Also need to fixup the other .pxi include paths. +yasm.pyx: $(srcdir)/tools/python-yasm/yasm.pyx + sed -e 's,^include "\([^_]\),include "${srcdir}/tools/python-yasm/\1,' \ + $(srcdir)/tools/python-yasm/yasm.pyx > $@ + +CLEANFILES += yasm.pyx + +# Actually run Cython +yasm_python.c: yasm.pyx _yasm.pxi $(PYBINDING_DEPS) + $(PYTHON) -c "from Cython.Compiler.Main import main; main(command_line=1)" \ + -o $@ yasm.pyx + +CLEANFILES += yasm_python.c + +# Now the Python build magic... +python-setup.txt: Makefile + echo "includes=${DEFS} ${DEFAULT_INCLUDES} ${INCLUDES} ${AM_CPPFLAGS} ${CPPFLAGS}" > python-setup.txt + echo "sources=${libyasm_a_SOURCES} ${nodist_libyasm_a_SOURCES}" >> python-setup.txt + echo "srcdir=${srcdir}" >> python-setup.txt + echo "gcc=${GCC}" >> python-setup.txt + +CLEANFILES += python-setup.txt + +.python-build: python-setup.txt yasm_python.c ${libyasm_a_SOURCES} ${nodist_libyasm_a_SOURCES} + $(PYTHON) `test -f tools/python-yasm/setup.py || echo '$(srcdir)/'`tools/python-yasm/setup.py build + touch .python-build +python-build: .python-build + +CLEANFILES += .python-build + +python-install: .python-build + $(PYTHON) `test -f tools/python-yasm/setup.py || echo '$(srcdir)/'`tools/python-yasm/setup.py install "--install-lib=$(DESTDIR)$(pythondir)" + +python-uninstall: + rm -f `$(PYTHON) -c "import sys;sys.path.insert(0, '${DESTDIR}${pythondir}'); import yasm; print yasm.__file__"` + +else + +python-build: +python-install: +python-uninstall: + +endif + +EXTRA_DIST += tools/python-yasm/tests/Makefile.inc +include tools/python-yasm/tests/Makefile.inc diff --git a/tools/python-yasm/bytecode.pxi b/tools/python-yasm/bytecode.pxi new file mode 100644 index 0000000..34aeaa5 --- /dev/null +++ b/tools/python-yasm/bytecode.pxi @@ -0,0 +1,107 @@ +# Python bindings for Yasm: Pyrex input file for bytecode.h +# +# Copyright (C) 2006 Michael Urman, Peter Johnson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +cdef class Bytecode: + cdef yasm_bytecode *bc + + cdef object __weakref__ # make weak-referenceable + + def __cinit__(self, bc): + self.bc = NULL + if PyCObject_Check(bc): + self.bc = <yasm_bytecode *>__get_voidp(bc, Bytecode) + else: + raise NotImplementedError + + def __dealloc__(self): + # Only free if we're not part of a section; if we're part of a section + # the section takes care of freeing the bytecodes. + if self.bc.section == NULL: + yasm_bc_destroy(self.bc) + + property len: + def __get__(self): return self.bc.len + def __set__(self, value): self.bc.len = value + property mult_int: + def __get__(self): return self.bc.mult_int + def __set__(self, value): self.bc.mult_int = value + property line: + def __get__(self): return self.bc.line + def __set__(self, value): self.bc.line = value + property offset: + def __get__(self): return self.bc.offset + def __set__(self, value): self.bc.offset = value + property bc_index: + def __get__(self): return self.bc.bc_index + def __set__(self, value): self.bc.bc_index = value + property symbols: + # Someday extend this to do something modifiable, e.g. return a + # list-like object. + def __get__(self): + cdef yasm_symrec *sym + cdef int i + if self.bc.symrecs == NULL: + return [] + s = [] + i = 0 + sym = self.bc.symrecs[i] + while sym != NULL: + s.append(__make_symbol(sym)) + i = i+1 + sym = self.bc.symrecs[i] + return s + +# +# Keep Bytecode reference paired with bc using weak references. +# This is broken in Pyrex 0.9.4.1; Pyrex 0.9.5 has a working version. +# + +from weakref import WeakValueDictionary as __weakvaldict +__bytecode_map = __weakvaldict() +#__bytecode_map = {} + +cdef object __make_bytecode(yasm_bytecode *bc): + __error_check() + vptr = PyCObject_FromVoidPtr(bc, NULL) + data = __bytecode_map.get(vptr, None) + if data: + return data + bcobj = Bytecode(__pass_voidp(bc, Bytecode)) + __bytecode_map[vptr] = bcobj + return bcobj + +# Org bytecode +def __org__new__(cls, start, value=0, line=0): + cdef yasm_bytecode *bc + bc = yasm_bc_create_org(start, line, value) + obj = Bytecode.__new__(cls, __pass_voidp(bc, Bytecode)) + __bytecode_map[PyCObject_FromVoidPtr(bc, NULL)] = obj + return obj +__org__new__ = staticmethod(__org__new__) +class Org(Bytecode): + __new__ = __org__new__ + + +#cdef class Section: diff --git a/tools/python-yasm/errwarn.pxi b/tools/python-yasm/errwarn.pxi new file mode 100644 index 0000000..9568cc0 --- /dev/null +++ b/tools/python-yasm/errwarn.pxi @@ -0,0 +1,73 @@ +# Python bindings for Yasm: Pyrex input file for errwarn.h +# +# Copyright (C) 2006 Peter Johnson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +class YasmError(Exception): pass + +cdef int __error_check() except 1: + cdef yasm_error_class errclass + cdef unsigned long xrefline + cdef char *errstr, *xrefstr + + # short path for the common case + if not <int>yasm_error_occurred(): + return 0 + + # look up our preferred python error, fall back to YasmError + # Order matters here. Go from most to least specific within a class + if yasm_error_matches(YASM_ERROR_ZERO_DIVISION): + exception = ZeroDivisionError + # Enable these once there are tests that need them. + #elif yasm_error_matches(YASM_ERROR_OVERFLOW): + # exception = OverflowError + #elif yasm_error_matches(YASM_ERROR_FLOATING_POINT): + # exception = FloatingPointError + #elif yasm_error_matches(YASM_ERROR_ARITHMETIC): + # exception = ArithmeticError + #elif yasm_error_matches(YASM_ERROR_ASSERTION): + # exception = AssertionError + #elif yasm_error_matches(YASM_ERROR_VALUE): + # exception = ValueError # include notabs, notconst, toocomplex + #elif yasm_error_matches(YASM_ERROR_IO): + # exception = IOError + #elif yasm_error_matches(YASM_ERROR_NOT_IMPLEMENTED): + # exception = NotImplementedError + #elif yasm_error_matches(YASM_ERROR_TYPE): + # exception = TypeError + #elif yasm_error_matches(YASM_ERROR_SYNTAX): + # exception = SyntaxError #include parse + else: + exception = YasmError + + # retrieve info (clears error) + yasm_error_fetch(&errclass, &errstr, &xrefline, &xrefstr) + + if xrefline and xrefstr: + PyErr_Format(exception, "%s: %d: %s", errstr, xrefline, xrefstr) + else: + PyErr_SetString(exception, errstr) + + if xrefstr: free(xrefstr) + free(errstr) + return 1 diff --git a/tools/python-yasm/expr.pxi b/tools/python-yasm/expr.pxi new file mode 100644 index 0000000..995a46a --- /dev/null +++ b/tools/python-yasm/expr.pxi @@ -0,0 +1,136 @@ +# Python bindings for Yasm: Pyrex input file for expr.h +# +# Copyright (C) 2006 Michael Urman, Peter Johnson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +cdef extern from *: + # Defined as a macro, so not automatically brought in by pyxelator + cdef yasm_expr *yasm_expr_simplify(yasm_expr *e, int calc_bc_dist) + +import operator +__op = {} +for ops, operation in [ + ((operator.__add__, operator.add, '+'), YASM_EXPR_ADD), + ((operator.__and__, operator.and_, '&'), YASM_EXPR_AND), + ((operator.__div__, operator.div, '/'), YASM_EXPR_SIGNDIV), + ((operator.__floordiv__, operator.floordiv, '//'), YASM_EXPR_SIGNDIV), + ((operator.__ge__, operator.ge, '>='), YASM_EXPR_GE), + ((operator.__gt__, operator.gt, '>'), YASM_EXPR_GT), + ((operator.__inv__, operator.inv, '~'), YASM_EXPR_NOT), + ((operator.__invert__, operator.invert), YASM_EXPR_NOT), + ((operator.__le__, operator.le, '<='), YASM_EXPR_LE), + ((operator.__lt__, operator.lt, '<'), YASM_EXPR_LT), + ((operator.__mod__, operator.mod, '%'), YASM_EXPR_SIGNMOD), + ((operator.__mul__, operator.mul, '*'), YASM_EXPR_MUL), + ((operator.__neg__, operator.neg), YASM_EXPR_NEG), + ((operator.__not__, operator.not_, 'not'), YASM_EXPR_LNOT), + ((operator.__or__, operator.or_, '|'), YASM_EXPR_OR), + ((operator.__sub__, operator.sub, '-'), YASM_EXPR_SUB), + ((operator.__xor__, operator.xor, '^'), YASM_EXPR_XOR), + ]: + for op in ops: + __op[op] = operation + +del operator, op, ops, operation + +cdef object __make_expression(yasm_expr *expr): + return Expression(__pass_voidp(expr, Expression)) + +cdef class Expression: + cdef yasm_expr *expr + + def __cinit__(self, op, *args, **kwargs): + self.expr = NULL + + if isinstance(op, Expression): + self.expr = yasm_expr_copy((<Expression>op).expr) + return + if PyCObject_Check(op): + self.expr = <yasm_expr *>__get_voidp(op, Expression) + return + + cdef size_t numargs + cdef unsigned long line + + op = __op.get(op, op) + numargs = len(args) + line = kwargs.get('line', 0) + + if numargs == 0 or numargs > 2: + raise NotImplementedError + elif numargs == 2: + self.expr = yasm_expr_create(op, self.__new_item(args[0]), + self.__new_item(args[1]), line) + else: + self.expr = yasm_expr_create(op, self.__new_item(args[0]), NULL, + line) + + cdef yasm_expr__item* __new_item(self, value) except NULL: + cdef yasm_expr__item *retval + if isinstance(value, Expression): + return yasm_expr_expr(yasm_expr_copy((<Expression>value).expr)) + #elif isinstance(value, Symbol): + # return yasm_expr_sym((<Symbol>value).sym) + #elif isinstance(value, Register): + # return yasm_expr_reg((<Register>value).reg) + elif isinstance(value, FloatNum): + return yasm_expr_float(yasm_floatnum_copy((<FloatNum>value).flt)) + elif isinstance(value, IntNum): + return yasm_expr_int(yasm_intnum_copy((<IntNum>value).intn)) + else: + try: + intnum = IntNum(value) + except: + raise ValueError("Invalid item value type '%s'" % type(value)) + else: + retval = yasm_expr_int((<IntNum>intnum).intn) + (<IntNum>intnum).intn = NULL + return retval + + def __dealloc__(self): + if self.expr != NULL: yasm_expr_destroy(self.expr) + + def simplify(self, calc_bc_dist=False): + self.expr = yasm_expr_simplify(self.expr, calc_bc_dist) + + def extract_segoff(self): + cdef yasm_expr *retval + retval = yasm_expr_extract_segoff(&self.expr) + if retval == NULL: + raise ValueError("not a SEG:OFF expression") + return __make_expression(retval) + + def extract_wrt(self): + cdef yasm_expr *retval + retval = yasm_expr_extract_wrt(&self.expr) + if retval == NULL: + raise ValueError("not a WRT expression") + return __make_expression(retval) + + def get_intnum(self, calc_bc_dist=False): + cdef yasm_intnum *retval + retval = yasm_expr_get_intnum(&self.expr, calc_bc_dist) + if retval == NULL: + raise ValueError("not an intnum expression") + return __make_intnum(yasm_intnum_copy(retval)) + diff --git a/tools/python-yasm/floatnum.pxi b/tools/python-yasm/floatnum.pxi new file mode 100644 index 0000000..b7bfbc3 --- /dev/null +++ b/tools/python-yasm/floatnum.pxi @@ -0,0 +1,49 @@ +# Python bindings for Yasm: Pyrex input file for floatnum.h +# +# Copyright (C) 2006 Michael Urman, Peter Johnson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +cdef class FloatNum: + cdef yasm_floatnum *flt + def __cinit__(self, value): + self.flt = NULL + if isinstance(value, FloatNum): + self.flt = yasm_floatnum_copy((<FloatNum>value).flt) + return + if PyCObject_Check(value): # should check Desc + self.flt = <yasm_floatnum *>PyCObject_AsVoidPtr(value) + return + + if isinstance(value, float): string = str(float) + else: string = value + self.flt = yasm_floatnum_create(string) + + def __dealloc__(self): + if self.flt != NULL: yasm_floatnum_destroy(self.flt) + + def __neg__(self): + result = FloatNum(self) + yasm_floatnum_calc((<FloatNum>result).flt, YASM_EXPR_NEG, NULL) + return result + def __pos__(self): return self + diff --git a/tools/python-yasm/intnum.pxi b/tools/python-yasm/intnum.pxi new file mode 100644 index 0000000..320ca1b --- /dev/null +++ b/tools/python-yasm/intnum.pxi @@ -0,0 +1,170 @@ +# Python bindings for Yasm: Pyrex input file for intnum.h +# +# Copyright (C) 2006 Michael Urman, Peter Johnson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +cdef class IntNum + +cdef object __intnum_op_ex(object x, yasm_expr_op op, object y): + value = __intnum_op(x, op, y) + __error_check() + return value + +cdef object __intnum_op(object x, yasm_expr_op op, object y): + if isinstance(x, IntNum): + result = IntNum(x) + if y is None: + yasm_intnum_calc((<IntNum>result).intn, op, NULL) + else: + # Coerce to intnum if not already + if isinstance(y, IntNum): + rhs = y + else: + rhs = IntNum(y) + yasm_intnum_calc((<IntNum>result).intn, op, (<IntNum>rhs).intn) + return result + elif isinstance(y, IntNum): + # Reversed operation - x OP y still, just y is intnum, x isn't. + result = IntNum(x) + yasm_intnum_calc((<IntNum>result).intn, op, (<IntNum>y).intn) + return result + else: + raise NotImplementedError + +cdef object __make_intnum(yasm_intnum *intn): + return IntNum(__pass_voidp(intn, IntNum)) + +cdef class IntNum: + cdef yasm_intnum *intn + + def __cinit__(self, value, base=None): + cdef unsigned char buf[16] + + self.intn = NULL + + if isinstance(value, IntNum): + self.intn = yasm_intnum_copy((<IntNum>value).intn) + return + if PyCObject_Check(value): + self.intn = <yasm_intnum *>__get_voidp(value, IntNum) + return + + if isinstance(value, str): + if base == 2: + self.intn = yasm_intnum_create_bin(value) + elif base == 8: + self.intn = yasm_intnum_create_oct(value) + elif base == 10 or base is None: + self.intn = yasm_intnum_create_dec(value) + elif base == 16: + self.intn = yasm_intnum_create_hex(value) + elif base == "nasm": + self.intn = yasm_intnum_create_charconst_nasm(value) + else: + raise ValueError("base must be 2, 8, 10, 16, or \"nasm\"") + elif isinstance(value, (int, long)): + _PyLong_AsByteArray(long(value), buf, 16, 1, 1) + self.intn = yasm_intnum_create_sized(buf, 1, 16, 0) + else: + raise ValueError + + def __dealloc__(self): + if self.intn != NULL: yasm_intnum_destroy(self.intn) + + def __long__(self): + cdef unsigned char buf[16] + yasm_intnum_get_sized(self.intn, buf, 16, 128, 0, 0, 0) + return _PyLong_FromByteArray(buf, 16, 1, 1) + + def __repr__(self): + return "IntNum(%d)" % self + + def __int__(self): return int(self.__long__()) + def __complex__(self): return complex(self.__long__()) + def __float__(self): return float(self.__long__()) + + def __oct__(self): return oct(int(self.__long__())) + def __hex__(self): return hex(int(self.__long__())) + + def __add__(x, y): return __intnum_op(x, YASM_EXPR_ADD, y) + def __sub__(x, y): return __intnum_op(x, YASM_EXPR_SUB, y) + def __mul__(x, y): return __intnum_op(x, YASM_EXPR_MUL, y) + def __div__(x, y): return __intnum_op_ex(x, YASM_EXPR_SIGNDIV, y) + def __floordiv__(x, y): return __intnum_op_ex(x, YASM_EXPR_SIGNDIV, y) + def __mod__(x, y): return __intnum_op_ex(x, YASM_EXPR_SIGNMOD, y) + def __neg__(self): return __intnum_op(self, YASM_EXPR_NEG, None) + def __pos__(self): return self + def __abs__(self): + if yasm_intnum_sign(self.intn) >= 0: return IntNum(self) + else: return __intnum_op(self, YASM_EXPR_NEG, None) + def __nonzero__(self): return not yasm_intnum_is_zero(self.intn) + def __invert__(self): return __intnum_op(self, YASM_EXPR_NOT, None) + def __lshift__(x, y): return __intnum_op(x, YASM_EXPR_SHL, y) + def __rshift__(x, y): return __intnum_op(x, YASM_EXPR_SHR, y) + def __and__(x, y): return __intnum_op(x, YASM_EXPR_AND, y) + def __or__(x, y): return __intnum_op(x, YASM_EXPR_OR, y) + def __xor__(x, y): return __intnum_op(x, YASM_EXPR_XOR, y) + + cdef object __op(self, yasm_expr_op op, object x): + if isinstance(x, IntNum): + rhs = x + else: + rhs = IntNum(x) + yasm_intnum_calc(self.intn, op, (<IntNum>rhs).intn) + return self + + def __iadd__(self, x): return self.__op(YASM_EXPR_ADD, x) + def __isub__(self, x): return self.__op(YASM_EXPR_SUB, x) + def __imul__(self, x): return self.__op(YASM_EXPR_MUL, x) + def __idiv__(self, x): return self.__op(YASM_EXPR_SIGNDIV, x) + def __ifloordiv__(self, x): return self.__op(YASM_EXPR_SIGNDIV, x) + def __imod__(self, x): return self.__op(YASM_EXPR_MOD, x) + def __ilshift__(self, x): return self.__op(YASM_EXPR_SHL, x) + def __irshift__(self, x): return self.__op(YASM_EXPR_SHR, x) + def __iand__(self, x): return self.__op(YASM_EXPR_AND, x) + def __ior__(self, x): return self.__op(YASM_EXPR_OR, x) + def __ixor__(self, x): return self.__op(YASM_EXPR_XOR, x) + + def __cmp__(self, x): + cdef yasm_intnum *t + t = yasm_intnum_copy(self.intn) + if isinstance(x, IntNum): + rhs = x + else: + rhs = IntNum(x) + yasm_intnum_calc(t, YASM_EXPR_SUB, (<IntNum>rhs).intn) + result = yasm_intnum_sign(t) + yasm_intnum_destroy(t) + return result + + def __richcmp__(x, y, op): + cdef yasm_expr_op aop + if op == 0: aop = YASM_EXPR_LT + elif op == 1: aop = YASM_EXPR_LE + elif op == 2: aop = YASM_EXPR_EQ + elif op == 3: aop = YASM_EXPR_NE + elif op == 4: aop = YASM_EXPR_GT + elif op == 5: aop = YASM_EXPR_GE + else: raise NotImplementedError + v = __intnum_op(x, aop, y) + return bool(not yasm_intnum_is_zero((<IntNum>v).intn)) diff --git a/tools/python-yasm/pyxelator/cparse.py b/tools/python-yasm/pyxelator/cparse.py new file mode 100755 index 0000000..53754f8 --- /dev/null +++ b/tools/python-yasm/pyxelator/cparse.py @@ -0,0 +1,819 @@ +#!/usr/bin/env python + +""" + +(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com> +Released under GNU LGPL license. + +""" + +import sys + +from lexer import Lexer +from parse_core import Symbols, Parser +import node as node_module + + +class Node(node_module.Node): + + def is_typedef(self): + for x in self: + if isinstance(x,Node): + if x.is_typedef(): + return 1 + return 0 + + #def explain(self): + #l = [] + #for x in self: + #if isinstance(x,Node): + #l.append(x.explain()) + #else: + #l.append(str(x)) + #return string.join(l," ") + ##(self.__class__.__name__,string.join(l) ) + + def psource(self): + if hasattr(self,'lines'): + print "# "+string.join(self.lines,"\n# ")+"\n" + + +################################################################### +# +################################################################### +# + + +class BasicType(Node): + " int char short etc. " + def __init__(self,name): + Node.__init__(self,name) + +class Qualifier(Node): + """ + """ + def __init__(self,name): + Node.__init__(self,name) + self.name=name + +class StorageClass(Node): + """ + """ + def __init__(self,name): + Node.__init__(self,name) + self.name=name + +class Typedef(StorageClass): + """ + """ + def __init__(self,s='typedef'): + Node.__init__(self,s) + #def explain(self): + #return "type" + +class Ellipses(Node): + """ + """ + def __init__(self,s='...'): + Node.__init__(self,s) + +class GCCBuiltin(BasicType): + """ + """ + pass + + +class Identifier(Node): + """ + """ + def __init__(self,name="",*items): + if name or 1: + Node.__init__(self,name,*items) + else: + Node.__init__(self) + self.name=name + +class Function(Node,Parser): + """ + """ + def __init__(self,*items): + Node.__init__(self,*items) + + def parse(self,lexer,symbols): + symbols = Symbols(symbols) + args = '' + #lexer.get_token() + if lexer.tok != ')': + if not lexer.tok: + self.parse_error(lexer) + #lexer.unget_token() # unget start of decl + while lexer.tok != ')': + node = ParameterDeclaration() + node.parse(lexer,symbols) + self.append( node ) + if lexer.tok != ')' and lexer.tok != ',': + self.parse_error(lexer) + if lexer.tok == ',': + lexer.get_token() + lexer.get_token() + + +class Pointer(Node): + """ + """ + def __init__(self,*items): + Node.__init__(self,*items) + +class Array(Node,Parser): + """ + """ + def __init__(self,*items): + Node.__init__(self,*items) + + def parse(self,lexer,symbols): + lexer.get_token() # a number or ']' + # XX + # HACK HACK: constant c expressions can appear in here: + # eg. [ 15 * sizeof (int) - 2 * sizeof (void *) ] + # XX + toks = [] + while lexer.tok != ']': + #self.append( lexer.kind ) + toks.append( lexer.tok ) + lexer.get_token() + child = " ".join(toks) + if child == "": + child = None + self.append( child ) + lexer.get_token() # read past the ']' + +class Tag(Node): + """ + """ + pass + + +class Compound(Node,Parser): + "Struct or Union" + + def __init__(self,*items,**kw): + Node.__init__(self,*items,**kw) + + def parse(self,lexer,symbols): + symbols = Symbols(symbols) + tag = "" # anonymous + if lexer.tok != '{': + tag = lexer.tok + if not ( tag[0]=='_' or tag[0].isalpha() ): + self.parse_error(lexer ,"expected tag, got '%s'"%tag ) + lexer.get_token() + if tag: + self.append(Tag(tag)) + else: + self.append(Tag()) + self.tag = tag + if lexer.tok == '{': + fieldlist = [] + lexer.get_token() + if lexer.tok != '}': + if not lexer.tok: self.parse_error(lexer) + while lexer.tok != '}': + node = StructDeclaration() + node.parse(lexer,symbols) + fieldlist.append( node ) + self += fieldlist + lexer.get_token() + if self.verbose: + print "%s.__init__() #<--"%(self) + +class Struct(Compound): + """ + """ + pass + +class Union(Compound): + """ + """ + pass + +class Enum(Node,Parser): + """ + """ + def __init__(self,*items,**kw): + Node.__init__(self,*items,**kw) + + def parse(self,lexer,symbols): + tag = "" # anonymous + if lexer.tok != '{': + tag = lexer.tok + if not ( tag[0]=='_' or tag[0].isalpha() ): + self.parse_error(lexer ,"expected tag, got '%s'"%tag ) + lexer.get_token() + if tag: + self.append(Tag(tag)) + else: + self.append(Tag()) + self.tag = tag + if lexer.tok == '{': + lexer.get_token() + if lexer.tok != '}': # XX dopey control flow + if not lexer.tok: # XX dopey control flow + self.parse_error(lexer) # XX dopey control flow + while lexer.tok != '}': # XX dopey control flow + if lexer.kind is not None: + self.expected_error(lexer ,"identifier" ) + ident = Identifier(lexer.tok) + if symbols[ident[0]] is not None: + self.parse_error(lexer,"%s already defined."%ident[0]) + symbols[ident[0]]=ident + self.append( ident ) + lexer.get_token() + if lexer.tok == '=': + lexer.get_token() + # ConstantExpr + # XX hack hack XX + while lexer.tok!=',' and lexer.tok!='}': + lexer.get_token() +# if type( lexer.kind ) is not int: +# #self.parse_error(lexer ,"expected integer" ) +# # XX hack hack XX +# while lexer.tok!=',' and lexer.tok!='}': +# lexer.get_token() +# else: +# # put initializer into the Identifier +# ident.append( lexer.kind ) +# lexer.get_token() + if lexer.tok != '}': + if lexer.tok != ',': + self.expected_error(lexer,"}",",") + lexer.get_token() # ',' + lexer.get_token() + if self.verbose: + print "%s.__init__() #<--"%(self) + + + +class Declarator(Node,Parser): + """ + """ + def __init__(self,*items): + Node.__init__(self,*items) + self.ident = None + + def parse(self,lexer,symbols): + #Parser.parse_enter(self,lexer) + stack = [] + # read up to identifier, pushing tokens onto stack + self.ident = self.parse_identifier(lexer,symbols,stack) + self.name = '' + if self.ident is not None: + self.append( self.ident ) + self.name = self.ident.name + # now read outwards from identifier + self.parse_declarator(lexer,symbols,stack) + #Parser.parse_leave(self,lexer) + + def parse_identifier(self,lexer,symbols,stack): + if self.verbose: + print "%s.parse_identifier()"%self + ident = None + if lexer.tok != ';': + while lexer.tok and lexer.kind is not None: + stack.append( (lexer.tok, lexer.kind) ) + lexer.get_token() + if lexer.tok: + ident = Identifier( lexer.tok ) + #stack.append( (ident.name, ident) ) + lexer.get_token() + if self.verbose: + print "%s.parse_identifier()=%s"%(self,repr(ident)) + return ident + + def parse_declarator(self,lexer,symbols,stack,level=0): + if self.verbose: + print " "*level+"%s.parse_declarator(%s) # --->"%\ + (self,stack) + if lexer.tok == '[': + while lexer.tok == '[': + node = Array() + node.parse(lexer,symbols) + self.append(node) + if lexer.tok == '(': + self.parse_error(lexer ,"array of functions" ) + elif lexer.tok == '(': + lexer.get_token() + node = Function() + node.parse(lexer,symbols) + self.append( node ) + if lexer.tok == '(': + self.parse_error(lexer ,"function returns a function" ) + if lexer.tok == '[': + self.parse_error(lexer ,"function returns an array" ) + while stack: + tok, kind = stack[-1] # peek + if tok == '(': + stack.pop() + self.consume(lexer,')') + self.parse_declarator(lexer,symbols,stack,level+1) + elif tok == '*': + stack.pop() + self.append( Pointer() ) + else: + tok, kind = stack.pop() + self.append( kind ) + if self.verbose: + print " "*level+"%s.parse_declarator(%s) # <---"%\ + (self,stack) + + +class AbstractDeclarator(Declarator): + """ used in ParameterDeclaration; may lack an identifier """ + + def parse_identifier(self,lexer,symbols,stack): + if self.verbose: + print "%s.parse_identifier()"%self + ident = None + ident = Identifier() + while 1: + if lexer.tok == ';': + self.parse_error(lexer) + if lexer.tok == ')': + break + if lexer.tok == ',': + break + if lexer.tok == '[': + break + if lexer.kind is None: + #print "%s.new identifier"%self + ident = Identifier( lexer.tok ) + lexer.get_token() + #stack.append( (ident.name, ident) ) + break + stack.append( (lexer.tok, lexer.kind) ) + lexer.get_token() + if self.verbose: + print "%s.parse_identifier()=%s"%(self,repr(ident)) + return ident + +class FieldLength(Node): + """ + """ + pass + +class StructDeclarator(Declarator): + """ + """ + def parse(self,lexer,symbols): + if lexer.tok != ':': + Declarator.parse(self,lexer,symbols) + if lexer.tok == ':': + lexer.get_token() + # ConstantExpr + length = int(lexer.tok) + #print "length = ",length + self.append( FieldLength(length) ) + lexer.get_token() + +class DeclarationSpecifiers(Node,Parser): + """ + """ + def __init__(self,*items): + Node.__init__(self,*items) + + def __eq__(self,other): + " unordered (set/bag) equality " + if not isinstance(other,Node): + return 0 + for i in range(len(self)): + if not self[i] in other: + return 0 + for i in range(len(other)): + if not other[i] in self: + return 0 + return 1 + + def parse(self,lexer,symbols): + self.parse_spec(lexer,symbols) + self.reverse() + + def parse_spec(self,lexer,symbols): + typespec = None + while lexer.tok: + if isinstance( lexer.kind, TypeAlias ) or\ + isinstance( lexer.kind, BasicType ): + if typespec is not None: + self.parse_error(lexer ,"type already specified as %s"\ + %typespec ) + typespec=lexer.kind + self.append( lexer.kind ) + lexer.get_token() + elif isinstance( lexer.kind, Qualifier ): + self.append( lexer.kind ) + lexer.get_token() + elif isinstance( lexer.kind, StorageClass ): + self.append( lexer.kind ) + lexer.get_token() + elif lexer.tok=='struct': + lexer.get_token() + self.parse_struct(lexer,symbols) + break #? + elif lexer.tok=='union': + lexer.get_token() + self.parse_union(lexer,symbols) + break #? + elif lexer.tok=='enum': + lexer.get_token() + self.parse_enum(lexer,symbols) + break #? + elif lexer.kind is None: + # identifier + break + else: + break + + def parse_struct(self,lexer,symbols): + if self.verbose: + print "%s.parse_struct()"%(self) + node = Struct() + node.parse(lexer,symbols) + _node = None + if node.tag: + _node = symbols.get_tag( node.tag ) + if _node is not None: + if not isinstance( _node, Struct ): + self.parse_error(lexer,"tag defined as wrong kind") + if len(node)>1: + if len(_node)>1: + self.parse_error(lexer,"tag already defined as %s"%_node) + #symbols.set_tag( node.tag, node ) + #else: + # refer to the previously defined struct + ##node = _node + #node = _node.clone() + if 0: + # refer to the previously defined struct + if len(node)==1: + _node = symbols.deep_get_tag( node.tag ) + if _node is not None: + node=_node + # But what about any future reference to the struct ? + if node.tag: + symbols.set_tag( node.tag, node ) + self.append( node ) + + def parse_union(self,lexer,symbols): + if self.verbose: + print "%s.parse_union(%s)"%(self,node) + node = Union() + node.parse(lexer,symbols) + _node = None + if node.tag: + _node = symbols.get_tag( node.tag ) + if _node is not None: + if not isinstance( _node, Union ): + self.parse_error(lexer,"tag %s defined as wrong kind"%repr(node.tag)) + if len(node)>1: + if len(_node)>1: + self.parse_error(lexer,"tag already defined as %s"%_node) + #symbols.set_tag( node.tag, node ) + #else: + #node = _node + #if len(node)==1: + #_node = symbols.deep_get_tag( node.tag ) + #if _node is not None: + #node=_node + if node.tag: + symbols.set_tag( node.tag, node ) + self.append( node ) + + def parse_enum(self,lexer,symbols): + if self.verbose: + print "%s.parse_enum(%s)"%(self,node) + node = Enum() + node.parse(lexer,symbols) + _node = None + if node.tag: + _node = symbols.get_tag( node.tag ) + if _node is not None: + if not isinstance( _node, Enum ): + self.parse_error(lexer,"tag defined as wrong kind") + if len(node)>1: + if len(_node)>1: + self.parse_error(lexer,"tag already defined as %s"%_node) + #symbols.set_tag( node.tag, node ) + #else: + #node = _node + #if len(node)==1: + #_node = symbols.deep_get_tag( node.tag ) + #if _node is not None: + #node=_node + if node.tag: + symbols.set_tag( node.tag, node ) + self.append( node ) + + def is_typedef(self): + return self.find(Typedef) is not None + + def needs_declarator(self): + for node in self: + if isinstance( node, Struct ): + return False + if isinstance( node, Enum ): + return False + if isinstance( node, Union ): + return False + return True + + + +class TypeSpecifiers(DeclarationSpecifiers): + " used in ParameterDeclaration " + + def parse_spec(self,lexer,symbols): + typespec = None + while lexer.tok: + if isinstance( lexer.kind, TypeAlias ) or\ + isinstance( lexer.kind, BasicType ): + if typespec is not None: + self.parse_error(lexer ,"type already specified as %s"\ + %typespec ) + typespec=lexer.kind + self.append( lexer.kind ) + lexer.get_token() + elif isinstance( lexer.kind, Qualifier ): + self.append( lexer.kind ) + lexer.get_token() + elif isinstance( lexer.kind, StorageClass ): + self.parse_error(lexer ,"'%s' cannot appear here"%lexer.tok ) + elif lexer.tok=='struct': + lexer.get_token() + self.parse_struct(lexer,symbols) + break #? + elif lexer.tok=='union': + lexer.get_token() + self.parse_union(lexer,symbols) + break #? + elif lexer.tok=='enum': + lexer.get_token() + self.parse_enum(lexer,symbols) + break #? + elif lexer.kind is None: + # identifier + break + else: + break + + +class Initializer(Node,Parser): + """ + """ + def __init__(self,*items): + Node.__init__(self,*items) + + def parse(self,lexer,symbols): + self.parse_error(lexer,"not implemented") + + +class TypeAlias(Node): + " typedefed things " + + def __init__(self,name,decl=None): + Node.__init__(self,name)#,decl) + self.name=name + self.decl=decl + + +class Declaration(Node,Parser): + """ + """ + def __init__(self,*items): + Node.__init__(self,*items) + #self.acted=False + + def parse(self,lexer,symbols): + if not lexer.tok: + return + Parser.parse_enter(self,lexer) + declspec = DeclarationSpecifiers() + declspec.parse(lexer,symbols) + if len(declspec)==0: + if lexer.tok == ';': + lexer.get_token() + # empty declaration... + return + self.parse_error(lexer, + "expected specifiers, got '%s'"%lexer.tok ) + self.append(declspec) + while 1: + decl = Declarator() + decl.parse(lexer,symbols) + if len(decl)==0: + if declspec.needs_declarator(): + self.parse_error(lexer, + "expected declarator, got '%s'"%lexer.tok ) + self.append(decl) + ident = decl.ident + if ident is not None: + #if len(ident): + # install symbol + node = symbols[ident[0]] + if node is not None: + # we allow functions to be defined (as same) again + #print node.deepstr(),'\n', self.deepstr() + _node = node.clone() + _node.delete(Identifier) + _self = self.clone() + _self.delete(Identifier) + if _node != _self: + self.parse_error(lexer, + "\n%s\n already defined as \n%s\n"%\ + (self.deepstr(),node.deepstr())) + else: + if self.is_typedef(): + #lexer.mktypedef( ident[0], self ) + tp = TypeAlias(ident[0],decl) + lexer.mktypedef( ident[0], tp ) + else: + symbols[ident[0]] = self + if lexer.tok == '=': + # parse initializer + lexer.get_token() + init = Initializer() + init.parse(lexer,symbols) + ident.append( init ) # as in Enum + #else: struct, union or enum + if lexer.tok == ';': + # no more declarators + break + if lexer.tok == '{': + # ! ahhh, function body !!! +# sys.stderr.write( +# "WARNING: function body found at line %s\n"%lexer.lno ) + bcount = 1 + while bcount: + lexer.get_brace_token() + if lexer.tok == '}': + bcount -= 1 + if lexer.tok == '{': + bcount += 1 + lexer.get_token() + Parser.parse_leave(self,lexer) + return + self.consume(lexer,',') + self.consume(lexer,';') + Parser.parse_leave(self,lexer) + + def is_typedef(self): + spec=self[0] + assert isinstance(spec,DeclarationSpecifiers), self.deepstr() + return spec.is_typedef() + + +class ParameterDeclaration(Declaration): + """ + """ + def parse(self,lexer,symbols): + typespec = TypeSpecifiers() + typespec.parse(lexer,symbols) + self.append(typespec) + decl = AbstractDeclarator() + decl.parse(lexer,symbols) + self.append(decl) + ident = decl.ident + if ident is not None and ident[0]: + node = symbols[ident[0]] + if node is not None: + self.parse_error(lexer, + "%s already defined as %s"%(ident,node)) + else: + symbols[ident[0]] = self + + +class StructDeclaration(Declaration): + """ + """ + def parse(self,lexer,symbols): + if not lexer.tok: + return + declspec = DeclarationSpecifiers() + declspec.parse(lexer,symbols) + self.append(declspec) + if len(declspec)==0: + if lexer.tok == ';': + lexer.get_token() + # empty declaration... + return + self.parse_error(lexer, + "expected specifiers, got '%s'"%lexer.tok ) + while 1: + decl = StructDeclarator() + decl.parse(lexer,symbols) + if len(decl)==0: + self.parse_error(lexer, + "expected declarator, got '%s'"%lexer.tok ) + self.append(decl) + ident = decl.ident + if ident is not None: + node = symbols[ident[0]] + if node is not None: + self.parse_error(lexer , + "%s already defined as %s"%(ident,node)) + else: + if declspec.is_typedef(): + self.parse_error(lexer,"typedef in struct or union") + else: + symbols[ident[0]] = self + if lexer.tok == ';': + break + self.consume(lexer,',') + self.consume(lexer,';') + + +class TransUnit(Node,Parser): + """ + """ + def __init__(self,*items,**kw): + Node.__init__(self,*items,**kw) + + def parse(self,s,verbose=0): + self.symbols = Symbols() + self.lexer = Lexer(s,verbose=verbose) #,host=__module__) + node = None + while self.lexer.tok: + node=Declaration() + node.parse(self.lexer,self.symbols) + #sys.stderr.write( "# line %s\n"%self.lexer.lno ) + if node: + self.append(node) + #node.psource() + #print node.deepstr(),'\n' + #node.act() + + def strip(self,files): + " leave only the declarations from <files> " + i=0 + while i<len(self): + if self[i].file in files: + i=i+1 + else: + self.pop(i) + + def strip_filter(self,cb): + " leave only the declarations such that cb(file) " + i=0 + while i<len(self): + if cb(self[i].file): + i=i+1 + else: + self.pop(i) + + def assert_no_dups(self): + check={} + for node in self.nodes(): + assert not check.has_key(id(node)) + check[id(node)]=1 + + + +try: + import NoModule + import psyco + from psyco.classes import * +except ImportError: + class _psyco: + def jit(self): pass + def bind(self, f): pass + def proxy(self, f): return f + psyco = _psyco() +psyco.bind( Lexer.get_token ) +psyco.bind( Node ) + +def run0(): + verbose = 0 + if not sys.argv[1:]: + s = sys.stdin.read() + if sys.argv[1:]: + s = sys.argv[1] + #if sys.argv[2:]: + #verbose = int(sys.argv[2]) + if 0: + import profile + profile.run('TransUnit(s)','prof.out') + import pstats + p=pstats.Stats('prof.out') + p.strip_dirs().sort_stats(-1).print_stats() + else: + node = TransUnit(verbose = 1 ) + node.parse(s) + node.act(1,1,1) + +def run1(): + cstr = "char *(*)() ," + node = AbstractDeclarator() + node.parse( Lexer(cstr,True), Symbols() ) + print node.deepstr() + +if __name__=="__main__": + pass + + diff --git a/tools/python-yasm/pyxelator/genpyx.py b/tools/python-yasm/pyxelator/genpyx.py new file mode 100755 index 0000000..3f2a4cc --- /dev/null +++ b/tools/python-yasm/pyxelator/genpyx.py @@ -0,0 +1,530 @@ +#!/usr/bin/env python +""" genpyx.py - parse c declarations + +(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com> +Released under GNU LGPL license. + +version 0.xx + +This is a module of mixin classes for ir.py . + +Towards the end of ir.py our global class definitions +are remapped to point to the class definitions in ir.py . +So, for example, when we refer to Node we get ir.Node . + +""" + +import sys +from datetime import datetime + +# XX use this Context class instead of all those kw dicts !! XX +class Context(object): + " just a record (struct) " + def __init__( self, **kw ): + for key, value in kw.items(): + setattr( self, key, value ) + def __getattr__( self, name ): + return None # ? + def __getitem__( self, name ): + return getattr(self, name) + +class OStream(object): + def __init__( self, filename=None ): + self.filename = filename + self.tokens = [] + self._indent = 0 + def put( self, token="" ): + assert type(token) is str + self.tokens.append( token ) + def startln( self, token="" ): + assert type(token) is str + self.tokens.append( ' '*self._indent + token ) + def putln( self, ln="" ): + assert type(ln) is str + self.tokens.append( ' '*self._indent + ln + '\n') + def endln( self, token="" ): + assert type(token) is str + self.tokens.append( token + '\n') + def indent( self ): + self._indent += 1 + def dedent( self ): + self._indent -= 1 + assert self._indent >= 0, self._indent + def join( self ): + return ''.join( self.tokens ) + def close( self ): + s = ''.join( self.tokens ) + f = open( self.filename, 'w' ) + f.write(s) + +# +############################################################################### +# + +class Node(object): + """ + tree structure + """ + _unique_id = 0 + def get_unique_id(cls): + Node._unique_id += 1 + return Node._unique_id + get_unique_id = classmethod(get_unique_id) + +# XX toks: use a tree of tokens: a list that can be push'ed and pop'ed XX + def pyxstr(self,toks=None,indent=0,**kw): + """ + Build a list of tokens; return the joined tokens string + """ + if toks is None: + toks = [] + for x in self: + if isinstance(x,Node): + x.pyxstr(toks, indent, **kw) + else: + toks.insert(0,str(x)+' ') + s = ''.join(toks) + return s + +# +################################################# + +class Named(object): + "has a .name property" + pass + +class BasicType(object): + "float double void char int" + pass + +class Qualifier(object): + "register signed unsigned short long const volatile inline" + def pyxstr(self,toks=None,indent=0,**kw): + if toks is None: + toks = [] + x = self[0] + if x not in ( 'const','volatile','inline','register'): # ignore these + toks.insert(0,str(x)+' ') + s = ''.join(toks) + return s + +class StorageClass(object): + "extern static auto" + def pyxstr(self,toks=None,indent=0,**kw): + return "" + +class Ellipses(object): + "..." + pass + +class GCCBuiltin(BasicType): + "things with __builtin prefix" + pass + +class Identifier(object): + """ + """ + def pyxstr(self,toks=None,indent=0,**kw): + if toks is None: + toks=[] + if self.name: + toks.append( self.name ) + return " ".join(toks) + +class TypeAlias(object): + """ + typedefed things, eg. size_t + """ + def pyxstr(self,toks=None,indent=0,cprefix="",**kw): + if toks is None: + toks = [] + for x in self: + if isinstance(x,Node): + x.pyxstr(toks, indent, cprefix=cprefix, **kw) + else: + s = str(x)+' ' + if cprefix: + s = cprefix+s + toks.insert(0,s) + s = ''.join(toks) + return s + +class Function(object): + """ + """ + def pyxstr(self,toks,indent=0,**kw): + #print '%s.pyxstr(%s)'%(self,toks) + _toks=[] + assert len(self) + i=0 + while isinstance(self[i],Declarator): + if not self[i].is_void(): + _toks.append( self[i].pyxstr(indent=indent, **kw) ) + i=i+1 + toks.append( '(%s)'% ', '.join(_toks) ) + while i<len(self): + self[i].pyxstr(toks, indent=indent, **kw) + i=i+1 + return " ".join(toks) + +class Pointer(object): + """ + """ + def pyxstr(self,toks,indent=0,**kw): + assert len(self) + node=self[0] + toks.insert(0,'*') + if isinstance(node,Function): + toks.insert(0,'(') + toks.append(')') + elif isinstance(node,Array): + toks.insert(0,'(') + toks.append(')') + return Node.pyxstr(self,toks,indent, **kw) + +class Array(object): + """ + """ + def pyxstr(self,toks,indent=0,**kw): + if self.size is None: + toks.append('[]') + else: + try: + int(self.size) + toks.append('[%s]'%self.size) + except: + toks.append('[]') + return Node( *self[:-1] ).pyxstr( toks,indent, **kw ) + +class Tag(object): + " the tag of a Struct, Union or Enum " + pass + +class Taged(object): + "Struct, Union or Enum " + pass + +class Compound(Taged): + "Struct or Union" + def pyxstr(self,_toks=None,indent=0,cprefix="",shadow_name=True,**kw): + if _toks is None: + _toks=[] + names = kw.get('names',{}) + kw['names'] = names + tag_lookup = kw.get('tag_lookup') + if self.tag: + tag=self.tag.name + else: + tag = '' + if isinstance(self,Struct): + descr = 'struct' + elif isinstance(self,Union): + descr = 'union' + _node = names.get(self.tag.name,None) + if ( _node is not None and _node.has_members() ) or \ + ( _node is not None and not self.has_members() ): + descr = '' # i am not defining myself here + #print "Compound.pyxstr", tag + #print self.deepstr() + if descr: + if cprefix and shadow_name: + tag = '%s%s "%s"'%(cprefix,tag,tag) + elif cprefix: + tag = cprefix+tag + toks = [ descr+' '+tag ] # struct foo + if self.has_members(): + toks.append(':\n') + for decl in self[1:]: # XX self.members + toks.append( decl.pyxstr(indent=indent+1, cprefix=cprefix, shadow_name=shadow_name, **kw)+"\n" ) # shadow_name = False ? + #elif not tag_lookup.get( self.tag.name, self ).has_members(): + # define empty struct here, it's the best we're gonna get + #pass + else: + if cprefix: # and shadow_name: + tag = cprefix+tag + toks = [ ' '+tag+' ' ] # foo + while toks: + _toks.insert( 0, toks.pop() ) + return "".join( _toks ) + +class Struct(Compound): + """ + """ + pass + +class Union(Compound): + """ + """ + pass + + +class Enum(Taged): + """ + """ + def pyxstr(self,_toks=None,indent=0,cprefix="",shadow_name=True,**kw): + if _toks is None: + _toks=[] + names = kw.get('names',{}) + kw['names'] = names + if self.tag: + tag=self.tag.name + else: + tag = '' + _node = names.get(self.tag.name,None) + if ( _node is not None and _node.has_members() ) or \ + ( _node is not None and not self.has_members() ): + descr = '' # i am not defining myself here + else: + descr = 'enum' + if descr: + #if not names.has_key(self.tag.name): + toks = [ descr+' '+tag ] # enum foo + toks.append(':\n') + idents = [ ident for ident in self.members if ident.name not in names ] + for ident in idents: + if cprefix and shadow_name: + ident = ident.clone() + ident.name = '%s%s "%s"' % ( cprefix, ident.name, ident.name ) + #else: assert 0 + toks.append( ' '+' '*indent + ident.pyxstr(**kw)+"\n" ) + names[ ident.name ] = ident + if not idents: + # empty enum def'n ! + #assert 0 # should be handled by parents... + toks.append( ' '+' '*indent + "pass\n" ) + else: + toks = [ ' '+tag+' ' ] # foo + while toks: + _toks.insert( 0, toks.pop() ) + return "".join( _toks ) + +class Declarator(object): + def is_pyxnative( self ): + # pyrex handles char* too + # but i don't know if we should make this the default + # sometimes we want to send a NULL, so ... XX + self = self.cbasetype() # WARNING: cbasetype may be cached + if self.is_void(): + return False + if self.is_primative(): + return True + if self.enum: + return True + #pointer = None + #if self.pointer: + #pointer = self.pointer + #elif self.array: + #pointer = self.array + #if pointer and pointer.spec: + #spec = pointer.spec + #if BasicType("char") in spec and not Qualifier("unsigned") in spec: + # char*, const char* + ##print self.deepstr() + #return True + return False + + def _pyxstr( self, toks, indent, cprefix, use_cdef, shadow_name, **kw ): + " this is the common part of pyxstr that gets called from both Declarator and Typedef " + names = kw.get('names',{}) # what names have been defined ? + kw['names']=names + for node in self.nodes(): # depth-first + if isinstance(node,Taged): + #print "Declarator.pyxstr", node.cstr() + if not node.tag.name: + node.tag.name = "_anon_%s" % Node.get_unique_id() + _node = names.get(node.tag.name,None) + #tag_lookup = kw.get('tag_lookup') + #other = tag_lookup.get(node.tag.name, node) + #if ((_node is None and (not isinstance(other,Compound) or not other.has_members())) + # or node.has_members()): + if _node is None or node.has_members(): + # either i am not defined at all, or this is my _real_ definition + # emit def'n of this node + #if isinstance(self,Typedef): + #toks.append( ' '*indent + 'ctypedef ' + node.pyxstr(indent=indent, cprefix=cprefix, shadow_name=shadow_name, **kw).strip() ) + #else: + toks.append( ' '*indent + 'cdef ' + node.pyxstr(indent=indent, cprefix=cprefix, shadow_name=shadow_name, **kw).strip() ) + names[ node.tag.name ] = node + elif isinstance(node,GCCBuiltin) and node[0] not in names: + #toks.append( ' '*indent + 'ctypedef long ' + node.pyxstr(indent=indent, **kw).strip() + ' # XX ??' ) # XX ?? + toks.append( ' '*indent + 'struct __unknown_builtin ' ) + toks.append( ' '*indent + 'ctypedef __unknown_builtin ' + node.pyxstr(indent=indent, **kw).strip() ) + names[ node[0] ] = node + for idx, child in enumerate(node): + if type(child)==Array and not child.has_size(): + # mutate this mystery array into a pointer XX method: Array.to_pointer() + node[idx] = Pointer() + node[idx].init_from( child ) # warning: shallow init + node[idx].pop() # pop the size element + + def pyxstr(self,toks=None,indent=0,cprefix="",use_cdef=True,shadow_name=True,**kw): + " note: i do not check if my name is already in 'names' " + self = self.clone() # <----- NOTE + toks=[] + names = kw.get('names',{}) # what names have been defined ? + kw['names']=names + + self._pyxstr( toks, indent, cprefix, use_cdef, shadow_name, **kw ) + + if self.name and not names.has_key( self.name ): + names[ self.name ] = self + if self.identifier is not None: + comment = "" + if self.name in python_kws: + comment = "#" + if cprefix and use_cdef and shadow_name: + # When we are defining this guy, we refer to it using the pyrex shadow syntax. + self.name = '%s%s "%s" ' % ( cprefix, self.name, self.name ) + cdef = 'cdef ' + if not use_cdef: cdef = '' # sometimes we don't want the cdef (eg. in a cast) + # this may need shadow_name=False: + toks.append( ' '*indent + comment + cdef + Node.pyxstr(self,indent=indent, cprefix=cprefix, **kw).strip() ) # + "(cprefix=%s)"%cprefix) + #else: i am just a struct def (so i already did that) # huh ?? XX bad comment + return ' \n'.join(toks) + + def pyxsym(self, ostream, names=None, tag_lookup=None, cprefix="", modname=None, cobjects=None): + assert self.name is not None, self.deepstr() + ostream.putln( '# ' + self.cstr() ) +# This cdef is no good: it does not expose a python object +# and we can't reliably set a global var + #ostream.putln( 'cdef %s %s' % ( self.pyx_adaptor_decl(cobjects), self.name ) ) # _CObject + #ostream.putln( '%s = %s()' % (self.name, self.pyx_adaptor_name(cobjects)) ) + #ostream.putln( '%s.p = <void*>&%s' % (self.name, cprefix+self.name) ) + ## expose a python object: + #ostream.putln( '%s.%s = %s' % (modname,self.name, self.name) ) + ostream.putln( '%s = %s( addr = <long>&%s )' % (self.name, self.pyx_adaptor_name(cobjects), cprefix+self.name) ) + return ostream + + +class Typedef(Declarator): + def pyxstr(self,toks=None,indent=0,cprefix="",use_cdef=True,shadow_name=True,**kw): # shadow_name=True + " warning: i do not check if my name is already in 'names' " + assert shadow_name == True + self = self.clone() # <----- NOTE + toks=[] + names = kw.get('names',{}) # what names have been defined ? + kw['names']=names + + #if self.tagged and not self.tagged.tag.name: + ## "typedef struct {...} foo;" => "typedef struct foo {...} foo;" + ## (to be emitted in the node loop below, and suppressed in the final toks.append) + #self.tagged.tag = Tag( self.name ) # this is how pyrex does it: tag.name == self.name + # XX that doesn't work (the resulting c fails to compile) XX + + self._pyxstr( toks, indent, cprefix, use_cdef, shadow_name, **kw ) + + #print self.deepstr() + if self.name and not names.has_key( self.name ): + names[ self.name ] = self + if not (self.tagged and self.name == self.tagged.tag.name): + comment = "" + if self.name in python_kws: + comment = "#" + #if cprefix: + # self.name = '%s%s "%s" ' % ( cprefix, self.name, self.name ) # XX pyrex can't do this + if cprefix: # shadow_name=True + # My c-name gets this prefix. See also TypeAlias.pyxstr(): it also prepends the cprefix. + self.name = '%s%s "%s" ' % ( cprefix, self.name, self.name ) + toks.append( ' '*indent + comment + 'ctypedef ' + Node.pyxstr(self,indent=indent, cprefix=cprefix, **kw).strip() ) + return ' \n'.join(toks) + + +class AbstractDeclarator(Declarator): + """ used in Function; may lack an identifier """ + def pyxstr(self,toks=None,indent=0,**kw): + if self.name in python_kws: + # Would be better to do this in __init__, but our subclass doesn't call our __init__. + self.name = '_' + self.name + #return ' '*indent + Node.pyxstr(self,toks,indent, **kw).strip() + return Node.pyxstr(self,toks,indent, **kw).strip() + + +class FieldLength(object): + """ + """ + def pyxstr(self,toks,indent,**kw): + pass + + +class StructDeclarator(Declarator): # also used in Union + """ + """ + def pyxstr(self,toks=None,indent=0,**kw): + comment = "" + if self.name in python_kws: + comment = "#" + return ' '*indent + comment + Node.pyxstr(self,toks,indent, **kw).strip() + +class DeclarationSpecifiers(object): + """ + """ + pass + +class TypeSpecifiers(DeclarationSpecifiers): + """ + """ + pass + +class Initializer(object): + """ + """ + pass + +class Declaration(object): + """ + """ + pass + +class ParameterDeclaration(Declaration): + """ + """ + pass + +class StructDeclaration(Declaration): + """ + """ + pass + +class TransUnit(object): + """ + Top level node. + """ + def pyx_decls(self, filenames, modname, macros = {}, names = {}, func_cb=None, cprefix="", **kw): + # PART 1: emit extern declarations + ostream = OStream() + now = datetime.today() + ostream.putln( now.strftime('# Code generated by pyxelator on %x at %X') + '\n' ) + ostream.putln("# PART 1: extern declarations") + for filename in filenames: + ostream.putln( 'cdef extern from "%s":\n pass\n' % filename ) + ostream.putln( 'cdef extern from *:' ) + file = None # current file + for node in self: + ostream.putln('') + ostream.putln(' # ' + node.cstr() ) + assert node.marked + comment = False + if node.name and node.name in names: + comment = True # redeclaration + #ostream.putln( node.deepstr( comment=True ) ) + s = node.pyxstr(indent=1, names=names, tag_lookup = self.tag_lookup, cprefix=cprefix, **kw) + if s.split(): + if comment: + s = "#"+s.replace( '\n', '\n#' ) + " # redeclaration " + if node.file != file: + file = node.file + #ostream.putln( 'cdef extern from "%s":' % file ) + ostream.putln( ' # "%s"' % file ) + ostream.putln( s ) + ostream.putln('\n') + #s = '\n'.join(toks) + return ostream.join() + +# XX warn when we find a python keyword XX +python_kws = """ +break continue del def except exec finally pass print raise +return try global assert lambda yield +for while if elif else and in is not or import from """.split() +python_kws = dict( zip( python_kws, (None,)*len(python_kws) ) ) + + diff --git a/tools/python-yasm/pyxelator/ir.py b/tools/python-yasm/pyxelator/ir.py new file mode 100755 index 0000000..cfa9c02 --- /dev/null +++ b/tools/python-yasm/pyxelator/ir.py @@ -0,0 +1,1163 @@ +#!/usr/bin/env python +""" ir.py - parse c declarations + +(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com> +Released under GNU LGPL license. + +version 0.xx + +""" + +import sys +#import cPickle as pickle +import pickle + +#from lexer import Lexer +from parse_core import Symbols #, Parser +import node as node_module +import cparse +import genpyx + +class Node(genpyx.Node, node_module.Node): + """ + tree structure + """ + def __init__( self, *args, **kw ): + node_module.Node.__init__( self, *args, **kw ) + self._marked = False + def get_marked( self ): + return self._marked + def set_marked( self, marked ): +# if marked: +# print "MARK", self + self._marked = marked + marked = property( get_marked, set_marked ) + +# def __getstate__( self ): +# return self.__class__, tuple( [ item.__getstate__() for item in self ] ) +# def __setstate__( self, state ): +# cls, states = state +# states = list(states) +# for idx, state in enumerate(states): +# items[idx] = items[idx].__setstate__( + def __getstate__(self): + return str(self) + def __setstate__(self, state): + Node.__init__(self) + self[:] = eval(state) + +# _unique_id = 0 +# def get_unique_id(cls): +# Node._unique_id += 1 +# return Node._unique_id +# get_unique_id = classmethod(get_unique_id) + + def __hash__( self ): + return hash( tuple([hash(type(self))]+[hash(item) for item in self]) ) + + def clone(self): + l = [] + for item in self: + if isinstance(item,Node): + item = item.clone() + l.append(item) + return self.__class__(*l, **self.__dict__) + + def init_from( self, other ): # class method ? + # Warning: shallow init + self[:] = other + self.__dict__.update( other.__dict__ ) + return self + +# def is_struct(self): +# for x in self: +# if isinstance(x,Node): +# if x.is_struct(): +# return 1 +# return 0 + + + #def explain(self): + #l = [] + #for x in self: + #if isinstance(x,Node): + #l.append(x.explain()) + #else: + #l.append(str(x)) + #return string.join(l," ") + ##(self.__class__.__name__,string.join(l) ) + + def psource(self): + if hasattr(self,'lines'): +# print "# "+string.join(self.lines,"\n# ")+"\n" + print "# "+"\n# ".join(self.lines)+"\n" + + def cstr(self,l=None): + """ + Build a list of tokens; return the joined tokens string + """ + if l is None: + l = [] + for x in self: + if isinstance(x,Node): + x.cstr(l) + else: + l.insert(0,str(x)+' ') + s = ''.join(l) + return s + + def ctype(self): # anon_clone + " return clone of self without identifiers " + #print "%s.ctype()"%self + l=[] + for x in self: + if isinstance(x,Node): + l.append(x.ctype()) + else: + l.append(x) + #print "%s.__class__(*%s)"%(self,l) + return self.__class__(*l, **self.__dict__) # XX **self.__dict__ ? + + def cbasetype(self): + " return ctype with all TypeAlias's replaced " + # WARNING: we cache results (so do not mutate self!!) + l=[] + for x in self: + if isinstance(x,Node): + l.append(x.cbasetype()) + else: + l.append(x) + #print "%s.__class__(*%s)"%(self,l) + return self.__class__(*l, **self.__dict__) # XX **self.__dict__ ? + + def signature( self, tank=None ): + if tank is None: + tank = {} + for node in self.nodes(): + if not tank.has_key( type(node) ): + tank[ type(node) ] = {} + type(node).tank = tank[type(node)] + shape = tuple( [ type(_node).__name__ for _node in node ] ) + if not tank[type(node)].has_key(shape): + tank[type(node)][shape] = [] + tank[type(node)][shape].append( node ) + return tank + + def psig( self, tank=None ): + if tank is None: + tank = {} + tank = self.signature(tank) + for key in tank.keys(): + print key.__name__ + for shape in tank[key].keys(): + print " ", shape + +# +################################################# + +class Named(genpyx.Named, Node): + " has a .name property " + def get_name(self): + if self: + assert type(self[0])==str + return self[0] + return None + def set_name(self, name): + if self: + self[0] = name + else: + self.append(name) + name = property(get_name,set_name) + + +class BasicType(genpyx.BasicType, Named): + "float double void char int" + pass + +class Qualifier(genpyx.Qualifier, Named): + "register signed unsigned short long const volatile inline" + pass + +class StorageClass(genpyx.StorageClass, Named): + "extern static auto" + pass + +class Ellipses(genpyx.Ellipses, Named): + "..." + pass + +class GCCBuiltin(genpyx.GCCBuiltin, BasicType): + "things with __builtin prefix" + pass + +class Identifier(genpyx.Identifier, Named): + """ + shape = +( str, +ConstExpr ) + """ + #def explain(self): + #if len(self)==1: + #return "%s"%self.name + #else: + #return "%s initialized to %s"%(self.name, + #Node(self[1]).explain()) # will handle Initializer + +# def ctype(self): +# return self.__class__(*self[1:]) #.clone() ? + +# def get_name(self): +# if self: +# return self[0] +# def set_name(self, name): +# if self: +# self[0] = name +# else: +# self.append(name) +# name = property(get_name,set_name) + + def cstr(self,l=None): + if l is None: + l=[] + if len(self)>1: + assert len(self)==2 + l.append( '%s = %s'%(self[0],self[1]) ) + elif len(self)==1: + l.append( str(self[0]) ) + return " ".join(l) + +class TypeAlias(genpyx.TypeAlias, Named): + """ + typedefed things, eg. size_t + + """ + def cbasetype( self ): + node = self.typedef.cbasetype().get_rest() + return node + +class Function(genpyx.Function, Node): + """ + """ + #def explain(self): + #if len(self): + #return "function (%s), returning"%\ + #", ".join( map(lambda x:x.explain(),self) ) + #else: + #return "function returning" + + def cstr(self,l): + #print '%s.cstr(%s)'%(self,l) + _l=[] + assert len(self) + i=0 + while isinstance(self[i],Declarator): + _l.append( self[i].cstr() ) + i=i+1 + l.append( '(%s)'% ', '.join(_l) ) + while i<len(self): + self[i].cstr(l) + i=i+1 + return " ".join(l) + + def return_type(self): + node = self[-1] + #assert isinstance(node,DeclarationSpecifiers) + return Declarator( Identifier(), node ) + ret = property(return_type) + + def get_args(self): + args = [ arg for arg in self[:-1] if not arg.is_void() ] + return args + args = property(get_args) + + def arg_types(self): + return [ AbstractDeclarator().init_from( arg.ctype() ) for arg in self[:-1]] + + def is_varargs(self): + for node in self.nodes(): + if isinstance(node,Ellipses) or 'va_list' in node: +# print self, 'is_varargs' + return True +# print self, 'is_varargs' + return False +# return fn.deepfind(Ellipses) or fn.deepfind('va_list') + + def ctype(self): + return Function(*self.arg_types()+[self[-1]]) # XX self[-1].ctype + + +class Pointer(genpyx.Pointer, Node): + """ + """ + def get_spec(self): + if type(self[0])==TypeSpecifiers: # isinstance ?? + return self[0] + spec = property(get_spec) + + #def explain(self): + #return "pointer to" + + def cstr(self,l): + assert len(self) + node=self[0] + l.insert(0,'*') + if isinstance(node,Function): + l.insert(0,'(') + l.append(')') + elif isinstance(node,Array): + l.insert(0,'(') + l.append(')') + return Node.cstr(self,l) + +class Array(genpyx.Array, Node): + """ + """ + #def explain(self): + #s='' + #if len(self): + #if type(self[0])==int: + #s='0 to %s '%(self[0]-1) + #return "array %sof"%s + def has_size(self): + try: + int(self.size) + return True + except: + return False + + def get_size(self): + if type(self[-1])==str: + try: return int(self[-1]) + except: return self[-1] + return self[-1] # None + size = property(get_size) + + def get_spec(self): + if type(self[0])==TypeSpecifiers: # isinstance ?? + return self[0] + spec = property(get_spec) + + def to_pointer(self): + node = Pointer() + node.init_from( self.clone() ) + node.pop() # pop the size element + return node + + def cstr(self,l): + if self.size is None: + l.append('[]') + else: + l.append('[%s]'%self.size) + return Node( *self[:-1] ).cstr( l ) + +class Tag(genpyx.Tag, Named): + " the tag of a Struct, Union or Enum " + pass + +class Taged(genpyx.Taged, Node): + "Struct, Union or Enum " + def get_tag(self): + if len(self): + tag = self[0] + assert type(tag)==Tag # isinstance ?? + else: + tag = None + return tag + def set_tag(self,tag): + if len(self): + self[0] = tag + else: + self.append(tag) + tag = property( get_tag, set_tag ) + def has_members(self): + return len(self)>1 # more than just a tag + def get_members(self): + return self[1:] + members = property(get_members) # fields ? + + def ctype(self): + if not self.tag.name: + #print "# WARNING : anonymous struct " # OK i think + return self.clone() +# self = self.clone() +# return self[:1] # just the tag + return self.__class__( self.tag, **self.__dict__ ) # just the Tag +# return self.__class__( *self, **self.__dict__ ) + + def cbasetype(self): + return self.ctype() # is this enough ??? +# return Node.cbasetype(self) # XX lookup my tag if i am empty ..? + + +class Compound(genpyx.Compound, Taged): + "Struct or Union" + + def cstr(self,_l=None): + assert isinstance( self[0], Tag ) + tag='' + if len(self[0]): + tag=' '+self[0][0] + if isinstance(self,Struct): + l=[ 'struct%s '%tag ] + elif isinstance(self,Union): + l=[ 'union%s '%tag ] + if len(self)>1: + l.append(' { ') + for decl in self[1:]: + l.append( decl.cstr()+"; " ) + l.append('} ') + if _l is None: + _l=[] + while l: + _l.insert( 0, l.pop() ) + # XX empty struct with no tag -> "struct" XX + return "".join( _l ) + + def ctype(self): + tp = Taged.ctype(self) + for i in range(1,len(tp)): + tp[i] = StructDeclarator().init_from( tp[i] ) + return tp + +class Struct(genpyx.Struct, Compound): + """ + """ + pass + + +class Union(genpyx.Union, Compound): + """ + """ + pass + + +class Enum(genpyx.Enum, Taged): + """ + """ + def cstr(self,_l=None): + assert isinstance( self[0], Tag ) + tag='' + if len(self[0]): + tag=' '+self[0][0] + l=[ 'enum%s '%tag ] + if len(self)>1: + l.append(' { ') + for node in self[1:]: + l.append( node.cstr()+', ' ) + l.append('} ') + if _l is None: + _l=[] + while l: + _l.insert( 0, l.pop() ) + return ''.join( _l ) + +class Declarator(genpyx.Declarator, Node): + """ + """ + + def __eq__(self,other): + " unordered equality " + # ordering sometimes gets lost when we do a cbasetype + if not isinstance(other,Node): + return False + a, b = self[:], other[:] + a.sort() + b.sort() + return a == b + + def __hash__( self ): + hs = [hash(item) for item in self] + hs.sort() + return hash( tuple([hash(type(self))]+hs) ) + + def transform(self): + return + + def get_identifier(self): + if len(self)>1: + return self[0] + def set_identifier(self, identifier): + if len(self)>1: + self[0] = identifier + else: + self.insert(0,identifier) + identifier = property(get_identifier,set_identifier) + + def get_spec(self): + spec = self[-1] + if type(spec)==TypeSpecifiers: # isinstance ?? + return spec + spec = property(get_spec) + + def get_type_alias(self): + if self.spec: + if isinstance(self.spec[0], TypeAlias): + return self.spec[0] + type_alias = property(get_type_alias) + + def get_tagged(self): + if self.spec: + return self.spec.tagged # i am a tagged + tagged = property(get_tagged) + + def get_compound(self): + if self.spec: + return self.spec.compound # i am a compound + compound = property(get_compound) + + def get_struct(self): + if self.spec: + return self.spec.struct # i am a struct + struct = property(get_struct) + + def get_union(self): + if self.spec: + return self.spec.union # i am a union + union = property(get_union) + + def get_enum(self): + if self.spec: + return self.spec.enum # i am an enum + enum = property(get_enum) + + def get_function(self): + if len(self)>1 and type(self[1])==Function: # isinstance ?? + return self[1] + function = property(get_function) + + def get_pointer(self): + if len(self)>1 and type(self[1])==Pointer: # isinstance ?? + return self[1] + pointer = property(get_pointer) + + def get_array(self): + if len(self)>1 and type(self[1])==Array: # isinstance ?? + return self[1] + array = property(get_array) + + def get_name(self): + if self.identifier: + return self.identifier.name + def set_name(self, name): + assert self.identifier is not None + self.identifier.name = name + name = property(get_name, set_name) + + def get_rest(self): # XX needs a better name + if len(self)>1: + return self[1] + return self[0] + + def pointer_to( self ): + " return Declarator pointing to self's type " + decl = Declarator(Identifier(), Pointer(self.get_rest().clone())) + return decl + + def deref( self ): + " return (clone of) Declarator that self is pointing to " + node = self.ctype() # clone + pointer = node.pointer or node.array + assert pointer, "cannot dereference non-pointer" + node[1:2] = pointer + return node + + def is_void(self): + return self.spec and BasicType('void') in self.spec + + def is_pointer_to_fn(self): + return self.pointer and self.deref().function + + def is_pointer_to_char(self): +# return self.ctype() == TransUnit("char *a;").transform()[0].ctype() + node = self.pointer or self.array + if node: + spec = node.spec + if spec and BasicType('char') in spec and not BasicType('unsigned') in spec: + return True + return False + + def is_callback(self): + " i am a pointer to a function whose last arg is void* " + if self.is_pointer_to_fn(): + fn = self.deref().function + if fn.args: + arg = fn.args[-1] + if arg.pointer and arg.deref().is_void(): + return True + + def is_complete( self, tag_lookup ): + if self.tagged and self.tagged.tag.name in tag_lookup and not tag_lookup[self.tagged.tag.name].has_members(): + return False + return True + + def is_primative( self ): + "i am a char,short,int,float,double... " + spec = self.cbasetype().spec + return spec and spec.find(BasicType) + + def is_pyxnative( self ): + # pyrex handles char* too + # but i don't know if we should make this the default + # sometimes we want to send a NULL, so ... XXX + self = self.cbasetype() + if self.is_void(): + return False + if self.is_primative(): + return True + if self.enum: + return True +# pointer = None +# if self.pointer: +# pointer = self.pointer +# elif self.array: +# pointer = self.array +# if pointer and pointer.spec: +# spec = pointer.spec +# if BasicType("char") in spec and not Qualifier("unsigned") in spec: +# # char*, const char* +## print self.deepstr() +# return True + return False + + def cstr(self,l=None): + return Node.cstr(self,l).strip() + + def ctype(self): + decl=Declarator() + decl.init_from( self.clone() ) + decl.identifier = Identifier() + for i in range(1,len(decl)): + decl[i]=decl[i].ctype() + return decl + + def cbasetype(self): + # WARNING: we cache results (so do not mutate self!!) + try: + # this cache improves performance by 50% + return self.__cbasetype.clone() + except AttributeError: + pass + decl = self.ctype() # gets rid of Identifier names + for i, node in enumerate(decl): + decl[i] = decl[i].cbasetype() +# return decl.get_rest() + + done = False + while not done: + done = True + nodes = decl.deepfilter( TypeSpecifiers ) + for node in nodes: + if node.deepfind( TypeSpecifiers ) != node: + # this node has another TypeSpecifier; + decl.expose_node( node ) + done = False + break # start again... + + # each TypeSpecifier needs to absorb primitive siblings (StorageClass, BasicType etc.) + nodes = decl.deepfilter( TypeSpecifiers ) + for node in nodes: + parent = decl.get_parent(node) + i = 0 + while i < len(parent): + assert not type(parent[i]) in (TypeAlias, Enum, Struct, Union) + if type(parent[i]) in (StorageClass, BasicType, Qualifier): + node.append( parent.pop(i) ) + else: + i = i + 1 + + self.__cbasetype = decl.clone() + return decl + + def invalidate(self): + # flush cache, etc. + try: + del self.__cbasetype + except AttributeError: + pass + + def declare_str(self,name): + " return c string declaring name with same type as self " + tp = self.ctype() + tp.name = name + return tp.cstr()+";" + +class Typedef(genpyx.Typedef, Declarator): + def cstr(self,l=None): + return 'typedef ' + Declarator.cstr(self,l) #.strip() + +class AbstractDeclarator(genpyx.AbstractDeclarator, Declarator): + """ used in Function; may lack an identifier """ + + #def cstr(self,l=None): + #return Node.cstr(self,l) + +# def ctype(self): +# # _type_ ignores the name of our identifier +# return Node.ctype(self) + +class FieldLength(genpyx.FieldLength, Node): + """ + """ + #def explain(self): + #return "" + + def cstr(self,l): + l.append(':%s'%self[0]) + +class StructDeclarator(genpyx.StructDeclarator, Declarator): # also used in Union + """ + """ + #def explain(self): + #flen = self.find(FieldLength) + #if flen is not None: + #i = self.index(flen) + #self.pop(i) + #s = Declarator.explain(self) + #self.insert(i,flen) + #width = flen[0] + #if width > 0: + #return s+" bitfield %s wide"%width + #else: + #return s+" alignment bitfield" + #else: + #return Declarator.explain(self) +# def ctype(self): +# return self + def get_field_length(self): + if len(self)>1 and isinstance( self[1], FieldLength ): + return self[1] + field_length = property(get_field_length) + + +class DeclarationSpecifiers(genpyx.DeclarationSpecifiers, Node): +#class TypeSpecifiers(Node): + """ + """ + def __eq__(self,other): + " unordered equality " + if not isinstance(other,Node): + return False + a, b = self[:], other[:] + a.sort() + b.sort() + return a == b + + def __hash__( self ): + hs = [hash(item) for item in self] + hs.sort() + return hash( tuple([hash(type(self))]+hs) ) + +# def is_struct(self): +# return self.find(Struct) is not None + + +class TypeSpecifiers(genpyx.TypeSpecifiers, DeclarationSpecifiers): + """ + """ + def get_tagged(self): + if self and isinstance(self[0],Taged): + return self[0] + tagged = property(get_tagged) + + def get_compound(self): + if self and isinstance(self[0],Compound): + return self[0] + compound = property(get_compound) + + def get_struct(self): + if self and isinstance(self[0],Struct): + return self[0] + struct = property(get_struct) + + def get_union(self): + if self and isinstance(self[0],Union): + return self[0] + union = property(get_union) + + def get_enum(self): + if self and isinstance(self[0],Enum): + return self[0] + enum = property(get_enum) + + def cbasetype(self): + node = Node.cbasetype(self) +# node.expose( TypeSpecifiers ) +# if node.deepfind(TypeSpecifiers) != node: + return node + +class Initializer(genpyx.Initializer, Node): + """ + """ + pass + + + +class Declaration(genpyx.Declaration, Node): + """ + """ + def do_spec(self): + " distribute DeclarationSpecifiers over each Declarator " + spec=self[0] + assert isinstance(spec,DeclarationSpecifiers), spec.deepstr() + self.pop(0) + for declarator in self: + assert isinstance(declarator,Declarator) + #if isinstance(declarator,DeclarationSpecifiers #huh? + ##for node in spec: + ##declarator.append(node.clone()) + declarator.append(spec) + + def transform(self): + # children go first + for node in self.nodes(): + if isinstance(node,Declaration): + node.do_spec() + node.file = self.file # overkill ? + self.expose(Declaration) + + #def explain(self): + #return string.join([x.explain() for x in self],", ") + #return string.join(map(lambda x:x.explain(),self),", ") + + +class ParameterDeclaration(genpyx.ParameterDeclaration, Declaration): + """ + """ + pass + + +class StructDeclaration(genpyx.StructDeclaration, Declaration): + """ + """ + pass + + +class TransUnit(genpyx.TransUnit, Node): + """ + Top level node. + """ + def __init__( self, item ): # XX __init__ uses different signature ! XX + if type(item)==str: + node = cparse.TransUnit() + node.parse(item) + else: + node = item + assert isinstance( node, cparse.TransUnit ), str(node) + Node.__init__(self) + self[:] = [ self.convert(child) for child in node ] + self.__dict__.update( node.__dict__ ) + assert "name" not in node.__dict__ + + self.syms = {} # map identifier names to their Declarator's + self.typedefs = {} # map names to Typedef's + self.tag_lookup = {} # map struct, union, enum tags to Taged's + + # XX should call transform here XX + +# print self.deepstr() + def __getstate__( self ): + nodes = tuple( [ repr(node) for node in self ] ) + typedefs = tuple( [ (key,repr(val)) for key,val in self.typedefs.items() ] ) + return nodes, typedefs + def __setstate__( self, state ): + Node.__init__(self) + nodes, typedefs = state + nodes = [ eval(node) for node in nodes ] + self[:] = nodes + typedefs = [ (key,eval(val)) for key,val in typedefs ] + self.typedefs = dict(typedefs) + + def convert( self, node ): +# name = node.__class__.__name__ +# cls = globals()[ name ] + cls = cls_lookup[ type(node) ] + _node = cls() + for child in node: + if isinstance(child, node_module.Node): + child = self.convert( child ) + else: + assert child is None or type(child) in (str, int), type(child) + _node.append( child ) + _node.__dict__.update( node.__dict__ ) + return _node + + def strip(self,files): + " leave only the declarations from <files> " + i=0 + while i<len(self): + if self[i].file in files: + i=i+1 + else: + self.pop(i) + + def mark(self,cb,verbose=False): + " mark our child nodes such that cb(node).. mark dependants too. prune unmarked objects. " + # mark the nodes: + for node in self: + node.marked = cb(self, node) + if verbose and node.marked: + print '1:', node.cstr() + # propagate dependancy: + i=len(self) + while i: + i-=1 # we go backwards + for node in self[i].nodes(): # bottom-up search + if verbose and self[i].marked and not node.marked: + print '2:', str(node), '<--', self[i].cstr() + node.marked = self[i].marked or node.marked + if type(node)==TypeAlias: + if verbose and node.marked and not node.typedef.marked: + print '3:', node.typedef.cstr(), '<--', node.cstr() + node.typedef.marked = node.typedef.marked or node.marked + if isinstance(node, Taged): + if node.tag.name in self.tag_lookup: + _node = self.tag_lookup[ node.tag.name ] # look-up the def'n + if verbose and node.marked and not _node.marked: + print '4:', _node.cstr(), '<--', self[i].cstr() +# _node.marked = _node.marked or self[i].marked + _node.marked = _node.marked or node.marked +# else: +# # this guy has no tag +# print "lost tag:", self[i].cstr() + + # XX struct defs acquire marks from members, but XX + # XX ordinary definitions do not XX +# if node.marked and not self[i].marked: +# # one of my descendants is marked +# if verbose: +# print '5:', self[i].cstr(), '<--', node.cstr() +# self[i].marked = True +# if verbose: +# for node in self: +# print '-'*79 +# if node.enum: +# print str(node.marked) + ': ' + node.cstr() + # prune: + f = open(".tmp/pruned.txt","w") + f.write("// This file autogenerated by '%s' .\n"%__file__) + f.write("// List of functions pruned from parse tree, for various reasons.\n\n") + i=0 + while i<len(self): + if not self[i].marked: + if verbose: print 'pop:', self[i].cstr() + f.write( self[i].cstr() + "\n" ) + self.pop(i) +# elif self[i].compound: +# # XXXX for now, rip out all struct members XXXX +# self[i].compound[1:] = [] # XX encapsulation +# i = i + 1 + else: + i = i + 1 + for key, value in self.syms.items(): + if not value.marked: + del self.syms[key] + for key, value in self.typedefs.items(): + if not value.marked: + del self.typedefs[key] + for key, value in self.tag_lookup.items(): + if not value.marked: + del self.tag_lookup[key] +# sys.exit(1) + + def assert_no_dups(self): + check={} + for node in self.nodes(): + assert not check.has_key(id(node)) + check[id(node)]=1 + + def transform(self, verbose=False, test_parse=False, test_types=False ): + i=0 + while i < len(self): + if verbose: print "##"*25 + declaration=self[i] + + if verbose: declaration.psource() + if verbose: print declaration.deepstr(),'\n' + assert isinstance(declaration,Declaration) + if verbose: print "# expose declarators from declaration" + + # STAGE 1 + declaration.transform() + + if verbose: print declaration.deepstr(),'\n' + self[i:i+1] = declaration # expose declarators from declaration + + for j in range(len(declaration)): + declarator=self[i] + + assert isinstance(declarator,Declarator) + if verbose: print "# declarator.transform()" + + # STAGE 2 + declarator.transform() + + if verbose: print declarator.deepstr(),'\n' + if verbose: print "# self.visit_declarator(declarator)" + + # STAGE 3 + self[i] = declarator = self.visit_declarator(declarator) + + # STAGE 4 + if declarator.name: + if isinstance(declarator, Typedef): + if verbose: print "# typedef %s" % declarator.name + self.typedefs[ declarator.name ] = declarator + else: + if verbose: print "# sym %s" % declarator.name + self.syms[ declarator.name ] = declarator + + for node in declarator.nodes(): + if isinstance(node,Taged) and node.tag.name: + assert type(node.tag.name)==str, node.deepstr() + taged = self.tag_lookup.get( node.tag.name, None ) + if taged is None: + if verbose: print "# tag lookup %s = %s" % (declarator.name, node.tag.name) + self.tag_lookup[ node.tag.name ] = node + elif not taged.has_members(): + # this is (maybe) the definition of this tag + if verbose: print "# definition %s = %s" % (declarator.name, node.tag.name) + self.tag_lookup[ node.tag.name ] = node + + # Annotate the TypeAlias's + for node in declarator.deepfilter( TypeAlias ): + name = node[0] + assert type( name ) == str + node.typedef = self.typedefs[ name ] + + if verbose: print declarator.deepstr(),'\n' + #print declarator.ctype().deepstr(),'\n' + #assert declarator.clone() == declarator + + ################################################### + # TESTS: + if test_parse: + # test that parse of cstr gives same answer + cstr = declarator.cstr()+';\n' + if verbose: print '# '+cstr.replace('\n','\n# ') + #print + if isinstance(declarator,Typedef): + name = declarator[0][0] + assert type(name)==str + self.lexer.rmtypedef( name ) + declaration = cparse.Declaration() + self.lexer.lex( cstr ) + #print self.lexer.err_string() + declaration.parse( self.lexer, Symbols() ) # use new name-space + #declaration.parse( Lexer( cstr ), Symbols() ) + declaration = self.convert(declaration) + declaration.transform() + assert len(declaration)==1 + decl=declaration[0] + decl.transform() + decl = self.visit_declarator(decl) + if decl!=declarator: + if verbose: print "#???????????" + if verbose: print decl.deepstr(),'\n\n' + #if verbose: print declaration.deepstr(),'\n\n' + #assert 0 + elif verbose: print '# OK\n' + + if test_types: + node = declarator.ctype() + declare_str= node.declare_str("my_name") + if verbose: print "# declarator.ctype() " + if verbose: print node.deepstr(),"\n" + if verbose: print "#",declare_str.replace('\n','\n# '), '\n' + + i=i+1 + return self + + def visit(self,node): + #print 'visit(%s)'%node + for _node in node: + if isinstance(_node,Declarator): + _node = self.visit_declarator(_node) # XX replace _node + elif isinstance(_node,Node): + _node = self.visit(_node) # XX replace _node + return node + + def visit_declarator(self,decl): + assert isinstance(decl,Declarator) + + # STAGE 3.a + tp = decl.deepfind(Typedef) + if tp is not None: + decl.deeprm(tp) + tp.init_from( decl ) # warning: shallow init + decl = tp + + # STAGE 3.b + i=len(decl) + # accumulate nodes (they become the children of decl) + children=[] + while i: + i=i-1 + node=decl.pop(i) + if isinstance(node,Declarator): + node = self.visit_declarator(node) # replace node + else: + node = self.visit(node) # replace node + if isinstance(node,Pointer): + node+=children + children=[node] + elif isinstance(node,Function): + node+=children + children=[node] + elif isinstance(node,Array): + while children: + node.insert(0,children.pop()) + children=[node] + # array size (if any) at end + #elif isinstance(node,Identifier): + #node+=children + #children=[node] + else: + # accumulate + children.insert(0,node) + decl[:]=children + return decl + + cstr = None + ctype = None + cbasetype = None + + +# remap the global class definitions in genpyx to +# point to the definitions in this module +gbl = globals() +for key, val in gbl.items(): + if type(val)==type: + if issubclass(val,Node): + setattr( genpyx, key, val ) +assert genpyx.Node == Node + +cls_lookup = { +# Node : Node , + cparse.BasicType : BasicType , + cparse.Qualifier : Qualifier , + cparse.StorageClass : StorageClass , + cparse.Ellipses : Ellipses , + cparse.GCCBuiltin : GCCBuiltin , + cparse.Identifier : Identifier , + cparse.TypeAlias : TypeAlias , + cparse.Function : Function , + cparse.Pointer : Pointer , + cparse.Array : Array , + cparse.Tag : Tag , + cparse.Compound : Compound , + cparse.Struct : Struct , + cparse.Union : Union , + cparse.Enum : Enum , + cparse.Declarator : Declarator , + cparse.Typedef : Typedef , + cparse.AbstractDeclarator : AbstractDeclarator , + cparse.FieldLength : FieldLength , + cparse.StructDeclarator : StructDeclarator , + cparse.DeclarationSpecifiers : TypeSpecifiers , + cparse.TypeSpecifiers : TypeSpecifiers , + cparse.Initializer : Initializer , + cparse.Declaration : Declaration , + cparse.ParameterDeclaration : ParameterDeclaration , + cparse.StructDeclaration : StructDeclaration , + cparse.TransUnit : TransUnit , +} + + diff --git a/tools/python-yasm/pyxelator/lexer.py b/tools/python-yasm/pyxelator/lexer.py new file mode 100755 index 0000000..c161219 --- /dev/null +++ b/tools/python-yasm/pyxelator/lexer.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python +""" cdecl.py - parse c declarations + +(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com> +Released under GNU LGPL license. + +version 0.xx + +""" + +import sys +import string +import types +import copy + +#from cparse import BasicType, Qualifier, StorageClass, Typedef, Ellipses, GCCBuiltin +#from cparse import * + +import cparse as host + +class LexError(Exception): + pass + +class Lexer(object): + def __init__(self,s="",verbose=0,**kw): + self.verbose = verbose + self.lookup = {} # a map for keywords and typedefs + for t in \ + "float double void char int".split(): + self.lookup[t] = host.BasicType( t ) + for t in \ + "register signed unsigned short long const volatile inline".split(): # inline here ??? + self.lookup[t] = host.Qualifier( t ) + for t in "extern static auto".split(): + self.lookup[t] = host.StorageClass( t ) + self.lookup['typedef'] = host.Typedef() + #self.lookup['__inline__'] = host.GCCBuiltin('__inline__') + #self.lookup['__extension__'] = host.Qualifier('__extension__') + self.lookup['...'] = host.Ellipses() + if s: + self.lex(s) + for key in kw.keys(): + self.__dict__[key] = kw[key] + + def lex(self,s): + self.stack = None + self.lines = s.splitlines() + self.set_state("","",0,0) + self.so_file = "" + self._newline() + self.get_token() # start + + def mktypedef(self,tok,node): + if self.verbose: + print "%s.mktypedef(%s,%s)"%(self,tok,node) + self.lookup[ tok ] = node + + def rmtypedef(self,tok): + " used in round trip testing " +# print "# rmtypedef(%s)"%tok + assert isinstance( self.lookup[ tok ], host.Node ) # existance + del self.lookup[ tok ] + + def _get_kind(self,tok): + #print '_get_kind(%s)'%tok,self.lookup + try: + return self.lookup[tok] + #return self.lookup[tok].clone() + except KeyError: + if tok.startswith("__builtin"): + node = host.GCCBuiltin(tok) + self.lookup[tok] = node + return node + #elif tok in ( "__extension__", ): + #node = GCCBuiltin(tok) + #self.lookup[tok] = node + #return node + return None + + def _newline(self): + while self.lno < len(self.lines): + line = self.lines[self.lno] + if not line or line[0] != "#": + break + l = line.split('"') + assert len(l)>=2 + self.so_file = l[1] + #self.so_lno = int( l[0].split()[1] ) + #sys.stderr.write("# %s %s: %s\n"%(so_lno,so_file,l)) + self.lno+=1 + + def get_brace_token( self ): + self.push_state() + ident_chars0 = string.letters+"_" + ident_chars1 = string.letters+string.digits+"_" + tok, kind = "", "" + while self.lno < len(self.lines): + s = self.lines[self.lno] + i=self.col + while i < len(s): + if s[i] not in '{}': + i=i+1 + continue + else: + tok = s[i] + kind = tok + self.col = i+1 + break + # keep moving + #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] ) + i=i+1 + if i==len(s): + # nothing found + assert tok == "" + self.col=0 + self.lno+=1 + self._newline() + else: + assert tok + break + self.set_state(tok,kind,self.lno,self.col) + + def get_token(self): + self.push_state() + ident_chars0 = string.letters+"_" + ident_chars1 = string.letters+string.digits+"_" + tok, kind = "", "" + while self.lno < len(self.lines): + s = self.lines[self.lno] + i=self.col + while i < len(s): + if s[i].isspace(): + i=i+1 + continue + #if s[i] in ident_chars0: + if s[i].isalpha() or s[i]=='_': + # identifier + j=i+1 + while j<len(s): + if s[j] in ident_chars1: + j=j+1 + else: + break + tok = s[i:j] + self.col = j + kind = self._get_kind(tok) + break + if s[i].isdigit() or \ + (i+1<len(s) and s[i] in '+-.' and s[i+1].isdigit()): + # number literal + is_float = s[i]=='.' + is_hex = s[i:i+2]=='0x' + if is_hex: + i=i+2 + assert s[i].isdigit() or s[i] in "abcdefABCDEF", self.err_string() + j=i+1 + while j<len(s): + #print "lex ",repr(s[i]),is_float + if s[j].isdigit() or (is_hex and s[j] in "abcdefABCDEF"): + j=j+1 + elif s[j]=='.' and not is_float: + assert not is_hex + j=j+1 + is_float=1 + else: + break + tok = s[i:j] + self.col = j + if is_float: + kind = float(tok) + elif is_hex: + kind = int(tok,16) + else: + kind = int(tok) + break + if s[i:i+3]=='...': + # ellipses + #sys.stderr.write( "ELLIPSES "+str(self.get_state()) ) + tok = s[i:i+3] + kind = self._get_kind(tok) + self.col = i+3 + break + if s[i] in '*/{}()[]:;,=+-~.<>|&': + tok = s[i] + kind = tok + self.col = i+1 + break + if s[i] == "'": + j = i+2 + while j<len(s) and s[j]!="'": + j+=1 + if j==len(s): + raise LexError( self.err_string() + "unterminated char constant" ) + tok = s[i:j+1] + self.col = j+1 + kind = s[i:j+1] + break + # keep moving + #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] ) + sys.stderr.write( "lexer ignoring '%s' lno=%d\n"%(s[i],self.lno+1) ) + i=i+1 + # end while i < len(s) + if i==len(s): + # nothing found, go to next line + assert tok == "" + self.col=0 + self.lno+=1 + self._newline() + else: + # we got one + assert tok + break + # end while self.lno < len(self.lines): + self.set_state(tok,kind,self.lno,self.col) + + def err_string(self): + "Return helpful error string :)" + return self.lines[self.lno]+"\n"+" "*self.col+"^\n" + + def push_state(self): + self.stack = self.get_state() # a short stack :) + #self.stack.push( self.get_state() ) + + def unget_token(self): + assert self.stack is not None + self.set_state(*self.stack) + self.stack = None + + def set_state(self,tok,kind,lno,col): + if self.verbose: + print "tok,kind,lno,col = ",(tok,kind,lno,col) + self.tok = tok + self.kind = kind + self.lno = lno # line + self.col = col # column + + def get_state(self): + return self.tok,self.kind,self.lno,self.col + + def get_file(self): + return self.so_file + +################################################################### +# +################################################################### +# + + diff --git a/tools/python-yasm/pyxelator/node.py b/tools/python-yasm/pyxelator/node.py new file mode 100755 index 0000000..5ce9043 --- /dev/null +++ b/tools/python-yasm/pyxelator/node.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python +""" cdecl.py - parse c declarations + +(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com> +Released under GNU LGPL license. + +version 0.xx + +""" + +import string + + +class Node(list): + " A node in a parse tree " + + def __init__(self,*items,**kw): + list.__init__( self, items ) + self.lock1 = 0 # these two should be properties (simplifies serializing) + self.lock2 = 0 + self.verbose = 0 + for key in kw.keys(): + self.__dict__[key] = kw[key] + + def __str__(self): + attrs = [] + for item in self: + if isinstance(item,Node): + attrs.append( str(item) ) + else: + attrs.append( repr(item) ) + attrs = ','.join(attrs) + return "%s(%s)"%(self.__class__.__name__,attrs) + + def safe_repr( self, tank ): + tank[ str(self) ] = None + attrs = [] + for item in self: + if isinstance(item,Node): + attrs.append( item.safe_repr(tank) ) # can we use repr here ? + else: + attrs.append( repr(item) ) + # this is the dangerous bit: + for key, val in self.__dict__.items(): + if isinstance(val,Node): + if str(val) not in tank: + attrs.append( '%s=%s'%(key,val.safe_repr(tank)) ) + else: + attrs.append( '%s=%s'%(key,repr(val)) ) + attrs = ','.join(attrs) + return "%s(%s)"%(self.__class__.__name__,attrs) + + def __repr__(self): + #attrs = ','.join( [repr(item) for item in self] + \ + # [ '%s=%s'%(key,repr(val)) for key,val in self.__dict__.items() ] ) + #return "%s%s"%(self.__class__.__name__,tuple(attrs)) + return self.safe_repr({}) + + def __eq__(self,other): + if not isinstance(other,Node): + return 0 + if len(self)!=len(other): + return 0 + for i in range(len(self)): + if not self[i]==other[i]: + return 0 + return 1 + + def __ne__(self,other): + return not self==other + + def filter(self,cls): + return [x for x in self if isinstance(x,cls)] + #return filter( lambda x:isinstance(x,cls), self ) + + def deepfilter(self,cls): + " bottom-up " + return [x for x in self.nodes() if isinstance(x,cls)] + + def find(self,cls): + for x in self: + if isinstance(x,cls): + return x + return None + + def deepfind(self,cls): + " bottom-up isinstance search " + for x in self: + if isinstance(x,Node): + if isinstance(x,cls): + return x + node = x.deepfind(cls) + if node is not None: + return node + if isinstance(self,cls): + return self + return None + + def leaves(self): + for i in self: + if isinstance( i, Node ): + for j in i.leaves(): + yield j + else: + yield i + + def nodes(self): + " bottom-up iteration " + for i in self: + if isinstance( i, Node ): + for j in i.nodes(): + yield j + yield self + + def deeplen(self): + i=0 + if not self.lock2: + self.lock2=1 + for item in self: + i+=1 + if isinstance(item,Node): + i+=item.deeplen() + self.lock2=0 + else: + i+=1 + return i + + def deepstr(self,level=0,comment=False,nl='\n',indent=' '): + if self.deeplen() < 4: + nl = ""; indent = "" + #else: + #nl="\n"; indent = " " + s = [] + if not self.lock1: + self.lock1=1 + for item in self: + if isinstance(item,Node): + s.append( indent*(level+1)+item.deepstr(level+1,False,nl,indent) ) + else: + s.append( indent*(level+1)+repr(item) ) + self.lock1=0 + else: + for item in self: + if isinstance(item,Node): + s.append( indent*(level+1)+"<recursion...>" ) + else: + s.append( indent*(level+1)+"%s"%repr(item) ) + s = "%s(%s)"%(self.__class__.__name__,nl+string.join(s,","+nl)) + if comment: + s = '#' + s.replace('\n','\n#') + return s + + def clone(self): + items = [] + for item in self: + if isinstance(item,Node): + item = item.clone() + items.append(item) + # we skip any attributes... + return self.__class__(*items) + + def fastclone(self): + # XX is it faster ??? + #print "clone" + nodes = [self] + idxs = [0] + itemss = [ [] ] + while nodes: + assert len(nodes)==len(idxs)==len(itemss) + node = nodes[-1] + items = itemss[-1] + assert idxs[-1] == len(items) + while idxs[-1]==len(node): + # pop + _node = node.__class__( *items ) + _node.__dict__.update( node.__dict__ ) + nodes.pop(-1) + idxs.pop(-1) + itemss.pop(-1) + if not nodes: + #for node0 in self.nodes(): + #for node1 in _node.nodes(): + #assert node0 is not node1 + #assert _node == self + return _node # Done !! + node = nodes[-1] + items = itemss[-1] + items.append(_node) # set + idxs[-1] += 1 + assert idxs[-1] == len(items) + #assert idxs[-1] < len(node), str( (node,nodes,idxs,itemss) ) + + _node = node[ idxs[-1] ] + # while idxs[-1]<len(node): + if isinstance(_node,Node): + # push + nodes.append( _node ) + idxs.append( 0 ) + itemss.append( [] ) + else: + # next + items.append(_node) + idxs[-1] += 1 + assert idxs[-1] == len(items) + + def expose(self,cls): + ' expose children of any <cls> instance ' + # children first + for x in self: + if isinstance(x,Node): + x.expose(cls) + # now the tricky bit + i=0 + while i < len(self): + if isinstance(self[i],cls): + node=self.pop(i) + for x in node: + assert not isinstance(x,cls) + # pass on some attributes + if hasattr(node,'lines') and not hasattr(x,'lines'): + x.lines=node.lines + if hasattr(node,'file') and not hasattr(x,'file'): + x.file=node.file + self.insert(i,x) # expose + i=i+1 + assert i<=len(self) + else: + i=i+1 + + def get_parent( self, item ): # XX 25% CPU time here XX + assert self != item + if item in self: + return self + for child in self: + if isinstance(child, Node): + parent = child.get_parent(item) + if parent is not None: + return parent + return None + + def expose_node( self, item ): + assert self != item + parent = self.get_parent(item) + idx = parent.index( item ) + parent[idx:idx+1] = item[:] + + def delete(self,cls): + ' delete any <cls> subtree ' + for x in self: + if isinstance(x,Node): + x.delete(cls) + # now the tricky bit + i=0 + while i < len(self): + if isinstance(self[i],cls): + self.pop(i) + else: + i=i+1 + + def deeprm(self,item): + ' remove any items matching <item> ' + for x in self: + if isinstance(x,Node): + x.deeprm(item) + # now the tricky bit + i=0 + while i < len(self): + if self[i] == item: + self.pop(i) + else: + i=i+1 + + def idem(self,cls): + " <cls> is made idempotent " + # children first + for x in self: + if isinstance(x,Node): + x.idem(cls) + if isinstance(self,cls): + # now the tricky bit + i=0 + while i < len(self): + if isinstance(self[i],cls): + node = self.pop(i) + for x in node: + assert not isinstance(x,cls) + self.insert(i,x) # idempotent + i=i+1 + assert i<=len(self) + else: + i=i+1 + +if __name__=="__main__": + node = Node( 'a', Node(1,2), Node(Node(Node(),1)) ) + + print node + print node.clone() + + + + diff --git a/tools/python-yasm/pyxelator/parse_core.py b/tools/python-yasm/pyxelator/parse_core.py new file mode 100755 index 0000000..84fb894 --- /dev/null +++ b/tools/python-yasm/pyxelator/parse_core.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +""" cdecl.py - parse c declarations + +(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com> +Released under GNU LGPL license. + +version 0.xx + +""" + +import sys + + +class Symbols(object): + def __init__(self,parent=None,verbose=False): + self.verbose = verbose + self.parent=parent # are we a nested namespace? + self.lookup = {} # identifiers + self.tags = {} # struct, union, enum tags + + def __str__(self): + return "Symbols(%s,%s)"%(self.lookup,self.tags) + + def __getitem__(self,key): + try: + item = self.lookup[key] + except KeyError: + item = None + #if self.parent is not None: + #item = self.parent[item] + ## self[key] = item # cache + #if self.verbose: print "%s.get('%s')='%s'"%(self,key,item) + return item + + def __setitem__(self,key,val): + #if self.verbose: print "%s.set('%s','%s')"%(self,key,val) + assert val is not None + self.lookup[key] = val + + def set_tag(self,key,val): + #if self.verbose: print "%s.set_tag(%s,%s)"%(self,key,val) + assert len(key) + self.tags[key] = val + + def deep_get_tag(self,key): + try: + item = self.tags[key] + except KeyError: + item = None + if self.parent is not None: + item = self.parent.deep_get_tag(key) + #if self.verbose: print "%s.get_tag(%s)=%s"%(self,key,item) + return item + + def get_tag(self,key): + try: + item = self.tags[key] + except KeyError: + item = None + #if self.verbose: print "%s.get_tag(%s)=%s"%(self,key,item) + return item + +################################################################### +# +################################################################### +# + + +class ParseError(Exception): + def __init__(self,*e): + self.e = e + + def __str__(self): + return "".join(map(str,self.e)) + + +class Parser(object): + def parse_error(self,lexer,reason="?",*blah): + sys.stderr.write( "%s.parse_error()\n"%self.deepstr() ) + sys.stderr.write( "at line %s: %s\n"%(lexer.lno+1,reason) ) + sys.stderr.write( lexer.err_string() ) + raise ParseError(reason,*blah) + + def expected_error(self,lexer,*l): + self.parse_error( lexer, "expected %s, got '%s'"\ + %(" or ".join(map(repr,l)),lexer.tok)) + + def consume(self,lexer,tok): + if lexer.tok != tok: + self.expected_error(lexer, tok) + lexer.get_token() + + def parse_enter(self,lexer): + #return + self.start_lno=lexer.lno + self.file=lexer.so_file + + def parse_leave(self,lexer): + #return + self.lines = lexer.lines[self.start_lno:max(lexer.lno,self.start_lno+1)] + +################################################################### +# +################################################################### +# + diff --git a/tools/python-yasm/pyxelator/work_unit.py b/tools/python-yasm/pyxelator/work_unit.py new file mode 100755 index 0000000..31ab3e5 --- /dev/null +++ b/tools/python-yasm/pyxelator/work_unit.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python + +""" + +(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com> +Released under GNU LGPL license. + +version 0.xx + +""" + + +import sys +import os + +import cparse +import ir + +def callcmd(cmd): + try: + from subprocess import call + try: + retcode = call(cmd, shell=True) + assert retcode == 0, "command failed: %s"%cmd + except OSError, e: + assert False, "command failed: %s"%e + except ImportError: + status = os.system( cmd ) + assert status == 0, "command failed: %s"%cmd + +class WorkUnit(object): + def __init__(self, files, modname, filename, + std=False, strip=False, mark_cb=None, + extradefs="", use_header=None, CC="gcc", CPP="gcc -E", + CPPFLAGS=""): + self.files = tuple(files) + self.modname = modname + self.filename = filename + self.CPPFLAGS = CPPFLAGS + self.CPP = CPP + if CC == 'g++': + self.CPPFLAGS += " -D__cplusplus" + self.std = std + self.strip = strip + self.mark_cb = mark_cb + self.node = None + self.extradefs = extradefs + self.CC = CC + self.use_header = use_header + + def mkheader( self ): + if self.use_header: + return self.use_header + tmpname = str(abs(hash( (self.files,self.CPPFLAGS) ))) + name = '.tmp/%s' % tmpname + ifile = open( name+'.h', "w" ) + ifile.write( """ +#define __attribute__(...) +#define __const const +#define __restrict +#define __extension__ +#define __asm__(...) +#define __asm(...) +#define __inline__ +#define __inline +""" ) + for filename in self.files: + if self.std: + line = '#include <%s>\n'%filename + else: + line = '#include "%s"\n'%filename + ifile.write( line ) + print line, + ifile.close() + cmd = '%s %s %s > %s'%(self.CPP,name+'.h',self.CPPFLAGS,name+'.E') + sys.stderr.write( "# %s\n" % cmd ) + callcmd( cmd ) + assert open(name+'.E').read().count('\n') > 10, "failed to run preprocessor" + cmd = '%s -dM %s %s > %s'%(self.CPP,name+'.h',self.CPPFLAGS,name+'.dM') + sys.stderr.write( "# %s\n" % cmd ) + callcmd( cmd ) + assert open(name+'.dM').read().count('\n') > 10, "failed to run preprocessor with -dM" + return name + + def parse(self, verbose=False): + sys.stderr.write( "# parse %s\n" % str(self.files) ) + name = self.mkheader() + # read macros + f = open(name+'.dM') + macros = {} + for line in f.readlines(): + if line: + macro = line.split()[1] + if macro.count('('): + macro = macro[:macro.index('(')] + macros[macro] = None + #keys = macros.keys() + #keys.sort() + #for key in keys: + #print key + self.macros = macros + # parse preprocessed code + f = open(name+'.E') + s = f.read() + self.extradefs + self.node = cparse.TransUnit(verbose = verbose) + sys.stderr.write( "# parsing %s lines\n" % s.count('\n') ) + self.node.parse( s ) + if self.strip: + self.node.strip(self.files) + + def transform(self, verbose=False, test_parse=False, test_types=False): + sys.stderr.write( "# processing...\n" ) + self.node = ir.TransUnit( self.node ) + self.node.transform(verbose, test_parse, test_types) + #self.node[0].psource() + if self.mark_cb is not None: + self.node.mark(self.mark_cb,verbose=False) + + def output( self, func_cb = None ): + sys.stderr.write( "# pyxstr...\n" ) + decls = self.node.pyx_decls(self.files, self.modname, macros = self.macros, func_cb = func_cb, names={}, cprefix="" ) + + name = self.filename + assert name.endswith(".pyx") + + pxi = name[:-3]+'pxi' + file = open( pxi, "w" ) + file.write(decls) + sys.stderr.write( "# wrote %s, %d lines\n" % (pxi,decls.count('\n')) ) + + def pprint(self): + for decl in self.node: + #decl.psource() + #cstr = decl.cstr() + #cstr = cstr.replace( '\n', '\n# ' ) + print + #print '#', cstr + print decl.deepstr() + +def file_exists(path): + try: + os.stat(path) + return True + except OSError: + return False + +if sys.platform.count('darwin'): + shared_ext = '.dylib' +else: + shared_ext = '.so' + +def get_syms(libs, libdirs): + # XX write interface to objdump -t XX + libnames = [] + for lib in libs: + for ext in shared_ext,'.a': + libname = 'lib'+lib+ext + for libdir in libdirs: + path = libdir+'/'+libname + if file_exists(path): + libnames.append(path) + break + #else: + #print "cannot find %s lib as %s in %s" % ( lib, libname, libdir ) + print 'libnames:', libnames + syms = {} + accept = [ ' %s '%c for c in 'TVWBCDGRS' ] + #f = open('syms.out','w') + for libname in libnames: + try: + from subprocess import Popen, PIPE + p = Popen(['nm', libname], bufsize=1, stdout=PIPE) + fout = p.stdout + except ImportError: + fin, fout = os.popen2( 'nm %s' % libname ) + for line in fout.readlines(): + for acc in accept: + if line.count(acc): + left, right = line.split(acc) + sym = right.strip() + if sys.platform.count('darwin'): + if sym[0] == '_': + sym = sym[1:] # remove underscore prefix + if sym.endswith('.eh'): + sym = sym[:-len('.eh')] + syms[sym] = None + #f.write( '%s: %s %s\n' % (sym,line[:-1],libname) ) + break + return syms + + + diff --git a/tools/python-yasm/pyxelator/wrap_yasm.py b/tools/python-yasm/pyxelator/wrap_yasm.py new file mode 100755 index 0000000..58553ab --- /dev/null +++ b/tools/python-yasm/pyxelator/wrap_yasm.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +""" + +(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com> +Released under GNU LGPL license. + +version 0.xx + +""" + + +import sys +import os + +from work_unit import WorkUnit, get_syms +import ir + + +def mk_tao(CPPFLAGS = "", CPP = "gcc -E", modname = '_yasm', oname = None, YASM_DIR = ".", **options): + if oname is None: + oname = modname+'.pyx' + CPPFLAGS += " -I"+YASM_DIR + CPPFLAGS += " -DYASM_PYXELATOR" + CPPFLAGS += " -DYASM_LIB_INTERNAL" + CPPFLAGS += " -DYASM_BC_INTERNAL" + CPPFLAGS += " -DYASM_EXPR_INTERNAL" + files = [ 'libyasm.h', 'libyasm/assocdat.h', 'libyasm/bitvect.h' ] + + syms = get_syms( ['yasm'], [YASM_DIR] ) + def cb(trans_unit, node, *args): + name, file = node.name, node.file + return True + return name in syms + extradefs = "" + unit = WorkUnit(files,modname,oname,False,mark_cb=cb,extradefs=extradefs, + CPPFLAGS=CPPFLAGS, CPP=CPP, **options) + + + unit.parse( False ) + unit.transform(verbose=False, test_parse=False, test_types=False) + unit.output() + +def main(): + options = {} + for i,arg in enumerate(sys.argv[1:]): + if arg.count('='): + key,val = arg.split('=', 1) + options[key]=val + mk_tao(**options) + +if __name__=="__main__": + main() + + + + diff --git a/tools/python-yasm/setup.py b/tools/python-yasm/setup.py new file mode 100644 index 0000000..d4ce2f4 --- /dev/null +++ b/tools/python-yasm/setup.py @@ -0,0 +1,88 @@ +#! /usr/bin/env python +# Build Python extension with configuration file input +# +# Copyright (C) 2006 Peter Johnson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from distutils.core import setup +from distutils.extension import Extension +from Cython.Distutils import build_ext +from os.path import basename, join, exists + +def ReadSetup(filename): + """ReadSetup goes through filename and parses out the values stored + in the file. Values need to be stored in a + \"key=value format\"""" + return dict(line.split('=', 1) for line in open(filename)) + +def ParseCPPFlags(flags): + """parse the CPPFlags macro""" + incl_dir = [x[2:] for x in flags.split() if x.startswith("-I")] + cppflags = [x for x in flags.split() if not x.startswith("-I")] + cppflags.append("-DYASM_LIB_INTERNAL") + cppflags.append("-DYASM_BC_INTERNAL") + cppflags.append("-DYASM_EXPR_INTERNAL") + return (incl_dir, cppflags) + +def ParseSources(src, srcdir): + """parse the Sources macro""" + # do the dance of detecting if the source file is in the current + # directory, and if it's not, prepend srcdir + sources = [] + for tok in src.split(): + if tok.endswith(".c"): + fn = tok + else: + continue + if not exists(fn): + fn = join(srcdir, fn) + sources.append(fn) + + return sources + +def RunSetup(incldir, cppflags, sources): + setup( + name='yasm', + version='0.0', + description='Python bindings for Yasm', + author='Michael Urman, Peter Johnson', + url='http://www.tortall.net/projects/yasm', + ext_modules=[ + Extension('yasm', + sources=sources, + extra_compile_args=cppflags, + include_dirs=incldir, + ), + ], + cmdclass = dict(build_ext=build_ext), + ) + +if __name__ == "__main__": + opts = ReadSetup("python-setup.txt") + incldir, cppflags = ParseCPPFlags(opts["includes"]) + sources = ParseSources(opts["sources"], opts["srcdir"].strip()) + sources.append('yasm_python.c') + if opts["gcc"].strip() == "yes": + cppflags.append('-w') + RunSetup(incldir, cppflags, sources) + diff --git a/tools/python-yasm/symrec.pxi b/tools/python-yasm/symrec.pxi new file mode 100644 index 0000000..eb56ccf --- /dev/null +++ b/tools/python-yasm/symrec.pxi @@ -0,0 +1,285 @@ +# Python bindings for Yasm: Pyrex input file for symrec.h +# +# Copyright (C) 2006 Michael Urman, Peter Johnson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +cdef class Symbol: + cdef yasm_symrec *sym + + def __cinit__(self, symrec): + self.sym = NULL + if PyCObject_Check(symrec): + self.sym = <yasm_symrec *>__get_voidp(symrec, Symbol) + else: + raise NotImplementedError + + # no deref or destroy necessary + + property name: + def __get__(self): return yasm_symrec_get_name(self.sym) + + property status: + def __get__(self): + cdef yasm_sym_status status + s = set() + status = yasm_symrec_get_status(self.sym) + if <int>status & <int>YASM_SYM_USED: s.add('used') + if <int>status & <int>YASM_SYM_DEFINED: s.add('defined') + if <int>status & <int>YASM_SYM_VALUED: s.add('valued') + return s + + property in_table: + def __get__(self): + return bool(<int>yasm_symrec_get_status(self.sym) & + <int>YASM_SYM_NOTINTABLE) + + property visibility: + def __get__(self): + cdef yasm_sym_vis vis + s = set() + vis = yasm_symrec_get_visibility(self.sym) + if <int>vis & <int>YASM_SYM_GLOBAL: s.add('global') + if <int>vis & <int>YASM_SYM_COMMON: s.add('common') + if <int>vis & <int>YASM_SYM_EXTERN: s.add('extern') + if <int>vis & <int>YASM_SYM_DLOCAL: s.add('dlocal') + return s + + property equ: + def __get__(self): + cdef yasm_expr *e + e = yasm_symrec_get_equ(self.sym) + if not e: + raise AttributeError("not an EQU") + return __make_expression(yasm_expr_copy(e)) + + property label: + def __get__(self): + cdef yasm_symrec_get_label_bytecodep bc + if yasm_symrec_get_label(self.sym, &bc): + return None #Bytecode(bc) + else: + raise AttributeError("not a label or not defined") + + property is_special: + def __get__(self): return bool(yasm_symrec_is_special(self.sym)) + + property is_curpos: + def __get__(self): return bool(yasm_symrec_is_curpos(self.sym)) + + def get_data(self): pass # TODO + #return <object>(yasm_symrec_get_data(self.sym, PyYasmAssocData)) + + def set_data(self, data): pass # TODO + #yasm_symrec_set_data(self.sym, PyYasmAssocData, data) + +# +# Use associated data mechanism to keep Symbol reference paired with symrec. +# + +cdef void __python_symrec_cb_destroy(void *data): + Py_DECREF(<object>data) +cdef void __python_symrec_cb_print(void *data, FILE *f, int indent_level): + pass +__python_symrec_cb = __assoc_data_callback( + PyCObject_FromVoidPtr(&__python_symrec_cb_destroy, NULL), + PyCObject_FromVoidPtr(&__python_symrec_cb_print, NULL)) + +cdef object __make_symbol(yasm_symrec *symrec): + cdef void *data + __error_check() + data = yasm_symrec_get_data(symrec, + (<__assoc_data_callback>__python_symrec_cb).cb) + if data != NULL: + return <object>data + symbol = Symbol(__pass_voidp(symrec, Symbol)) + yasm_symrec_add_data(symrec, + (<__assoc_data_callback>__python_symrec_cb).cb, + <void *>symbol) + Py_INCREF(symbol) # We're keeping a reference on the C side! + return symbol + +cdef class Bytecode +cdef class SymbolTable + +cdef class SymbolTableKeyIterator: + cdef yasm_symtab_iter *iter + + def __cinit__(self, symtab): + if not isinstance(symtab, SymbolTable): + raise TypeError + self.iter = yasm_symtab_first((<SymbolTable>symtab).symtab) + + def __iter__(self): + return self + + def __next__(self): + if self.iter == NULL: + raise StopIteration + rv = yasm_symrec_get_name(yasm_symtab_iter_value(self.iter)) + self.iter = yasm_symtab_next(self.iter) + return rv + +cdef class SymbolTableValueIterator: + cdef yasm_symtab_iter *iter + + def __cinit__(self, symtab): + if not isinstance(symtab, SymbolTable): + raise TypeError + self.iter = yasm_symtab_first((<SymbolTable>symtab).symtab) + + def __iter__(self): + return self + + def __next__(self): + if self.iter == NULL: + raise StopIteration + rv = __make_symbol(yasm_symtab_iter_value(self.iter)) + self.iter = yasm_symtab_next(self.iter) + return rv + +cdef class SymbolTableItemIterator: + cdef yasm_symtab_iter *iter + + def __cinit__(self, symtab): + if not isinstance(symtab, SymbolTable): + raise TypeError + self.iter = yasm_symtab_first((<SymbolTable>symtab).symtab) + + def __iter__(self): + return self + + def __next__(self): + cdef yasm_symrec *sym + if self.iter == NULL: + raise StopIteration + sym = yasm_symtab_iter_value(self.iter) + rv = (yasm_symrec_get_name(sym), __make_symbol(sym)) + self.iter = yasm_symtab_next(self.iter) + return rv + +cdef int __parse_vis(vis) except -1: + if not vis or vis == 'local': return YASM_SYM_LOCAL + if vis == 'global': return YASM_SYM_GLOBAL + if vis == 'common': return YASM_SYM_COMMON + if vis == 'extern': return YASM_SYM_EXTERN + if vis == 'dlocal': return YASM_SYM_DLOCAL + msg = "bad visibility value %r" % vis + PyErr_SetString(ValueError, msg) + return -1 + +cdef class SymbolTable: + cdef yasm_symtab *symtab + + def __cinit__(self): + self.symtab = yasm_symtab_create() + + def __dealloc__(self): + if self.symtab != NULL: yasm_symtab_destroy(self.symtab) + + def use(self, name, line): + return __make_symbol(yasm_symtab_use(self.symtab, name, line)) + + def define_equ(self, name, expr, line): + if not isinstance(expr, Expression): + raise TypeError + return __make_symbol(yasm_symtab_define_equ(self.symtab, name, + yasm_expr_copy((<Expression>expr).expr), line)) + + def define_label(self, name, precbc, in_table, line): + if not isinstance(precbc, Bytecode): + raise TypeError + return __make_symbol(yasm_symtab_define_label(self.symtab, name, + (<Bytecode>precbc).bc, in_table, line)) + + def define_special(self, name, vis): + return __make_symbol( + yasm_symtab_define_special(self.symtab, name, + <yasm_sym_vis>__parse_vis(vis))) + + def declare(self, name, vis, line): + return __make_symbol( + yasm_symtab_declare(self.symtab, name, + <yasm_sym_vis>__parse_vis(vis), line)) + + # + # Methods to make SymbolTable behave like a dictionary of Symbols. + # + + def __getitem__(self, key): + cdef yasm_symrec *symrec + symrec = yasm_symtab_get(self.symtab, key) + if symrec == NULL: + raise KeyError + return __make_symbol(symrec) + + def __contains__(self, key): + cdef yasm_symrec *symrec + symrec = yasm_symtab_get(self.symtab, key) + return symrec != NULL + + def keys(self): + cdef yasm_symtab_iter *iter + l = [] + iter = yasm_symtab_first(self.symtab) + while iter != NULL: + l.append(yasm_symrec_get_name(yasm_symtab_iter_value(iter))) + iter = yasm_symtab_next(iter) + return l + + def values(self): + cdef yasm_symtab_iter *iter + l = [] + iter = yasm_symtab_first(self.symtab) + while iter != NULL: + l.append(__make_symbol(yasm_symtab_iter_value(iter))) + iter = yasm_symtab_next(iter) + return l + + def items(self): + cdef yasm_symtab_iter *iter + cdef yasm_symrec *sym + l = [] + iter = yasm_symtab_first(self.symtab) + while iter != NULL: + sym = yasm_symtab_iter_value(iter) + l.append((yasm_symrec_get_name(sym), __make_symbol(sym))) + iter = yasm_symtab_next(iter) + return l + + def has_key(self, key): + cdef yasm_symrec *symrec + symrec = yasm_symtab_get(self.symtab, key) + return symrec != NULL + + def get(self, key, x): + cdef yasm_symrec *symrec + symrec = yasm_symtab_get(self.symtab, key) + if symrec == NULL: + return x + return __make_symbol(symrec) + + def iterkeys(self): return SymbolTableKeyIterator(self) + def itervalues(self): return SymbolTableValueIterator(self) + def iteritems(self): return SymbolTableItemIterator(self) + def __iter__(self): return SymbolTableKeyIterator(self) + diff --git a/tools/python-yasm/tests/Makefile.inc b/tools/python-yasm/tests/Makefile.inc new file mode 100644 index 0000000..c6df22c --- /dev/null +++ b/tools/python-yasm/tests/Makefile.inc @@ -0,0 +1,13 @@ +EXTRA_DIST += tools/python-yasm/tests/python_test.sh +EXTRA_DIST += tools/python-yasm/tests/__init__.py +EXTRA_DIST += tools/python-yasm/tests/test_bytecode.py +EXTRA_DIST += tools/python-yasm/tests/test_expr.py +EXTRA_DIST += tools/python-yasm/tests/test_intnum.py +EXTRA_DIST += tools/python-yasm/tests/test_symrec.py + +if HAVE_PYTHON_BINDINGS + +TESTS_ENVIRONMENT += PYTHON=${PYTHON} +TESTS += tools/python-yasm/tests/python_test.sh + +endif diff --git a/tools/python-yasm/tests/__init__.py b/tools/python-yasm/tests/__init__.py new file mode 100644 index 0000000..f5afb30 --- /dev/null +++ b/tools/python-yasm/tests/__init__.py @@ -0,0 +1,69 @@ +# Test wrapper from Quod Libet +# http://www.sacredchao.net/quodlibet/ +import unittest, sys +suites = [] +add = registerCase = suites.append +from unittest import TestCase + +class Mock(object): + # A generic mocking object. + def __init__(self, **kwargs): self.__dict__.update(kwargs) + +import test_intnum +import test_symrec +import test_bytecode +import test_expr + +class Result(unittest.TestResult): + + separator1 = '=' * 70 + separator2 = '-' * 70 + + def addSuccess(self, test): + unittest.TestResult.addSuccess(self, test) + sys.stdout.write('.') + + def addError(self, test, err): + unittest.TestResult.addError(self, test, err) + sys.stdout.write('E') + + def addFailure(self, test, err): + unittest.TestResult.addFailure(self, test, err) + sys.stdout.write('F') + + def printErrors(self): + succ = self.testsRun - (len(self.errors) + len(self.failures)) + v = "%3d" % succ + count = 50 - self.testsRun + sys.stdout.write((" " * count) + v + "\n") + self.printErrorList('ERROR', self.errors) + self.printErrorList('FAIL', self.failures) + + def printErrorList(self, flavour, errors): + for test, err in errors: + sys.stdout.write(self.separator1 + "\n") + sys.stdout.write("%s: %s\n" % (flavour, str(test))) + sys.stdout.write(self.separator2 + "\n") + sys.stdout.write("%s\n" % err) + +class Runner: + def run(self, test): + suite = unittest.makeSuite(test) + pref = '%s (%d): ' % (test.__name__, len(suite._tests)) + print pref + " " * (25 - len(pref)), + result = Result() + suite(result) + result.printErrors() + return bool(result.failures + result.errors) + +def unit(run = []): + runner = Runner() + failures = False + for test in suites: + if not run or test.__name__ in run: + failures |= runner.run(test) + return failures + +if __name__ == "__main__": + raise SystemExit(unit(sys.argv[1:])) + diff --git a/tools/python-yasm/tests/python_test.sh b/tools/python-yasm/tests/python_test.sh new file mode 100755 index 0000000..18e7a69 --- /dev/null +++ b/tools/python-yasm/tests/python_test.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# Based on _sanity.sh from Quod Libet +# http://www.sacredchao.net/quodlibet/ + +set -e + +test -n "${srcdir}" || srcdir=. +test -n "${PYTHON}" || PYTHON=python + +if test "$1" = "--help" -o "$1" = "-h"; then + echo "Usage: $0 --sanity | [TestName] ..." + exit 0 +elif [ "$1" = "--sanity" ]; then + echo "Running static sanity checks." + grep "except None:" ${srcdir}/tools/python-yasm/tests/*.py +else + ${PYTHON} -c "import sys; import glob; sys.path.insert(0, '${srcdir}/tools/python-yasm'); sys.path.insert(0, glob.glob('build/lib.*')[0]); import tests; raise SystemExit(tests.unit('$*'.split()))" +fi + diff --git a/tools/python-yasm/tests/test_bytecode.py b/tools/python-yasm/tests/test_bytecode.py new file mode 100644 index 0000000..eb0e96d --- /dev/null +++ b/tools/python-yasm/tests/test_bytecode.py @@ -0,0 +1,3 @@ +from tests import TestCase, add +from yasm import Bytecode, Expression + diff --git a/tools/python-yasm/tests/test_expr.py b/tools/python-yasm/tests/test_expr.py new file mode 100644 index 0000000..97b021c --- /dev/null +++ b/tools/python-yasm/tests/test_expr.py @@ -0,0 +1,18 @@ +from tests import TestCase, add +from yasm import Expression +import operator + +class TExpression(TestCase): + def test_create(self): + e1 = Expression(operator.add, 1, 2) + e2 = Expression('+', 1, 2) + + self.assertEquals(e1.get_intnum(), e1.get_intnum()) + + def test_extract(self): + e1 = Expression('/', 15, 5) + self.assertEquals(e1.get_intnum(), 3) + self.assertRaises(ValueError, e1.extract_segoff) + self.assertRaises(ValueError, e1.extract_wrt) + +add(TExpression) diff --git a/tools/python-yasm/tests/test_intnum.py b/tools/python-yasm/tests/test_intnum.py new file mode 100644 index 0000000..a222018 --- /dev/null +++ b/tools/python-yasm/tests/test_intnum.py @@ -0,0 +1,77 @@ +from tests import TestCase, add +from yasm import IntNum + +class TIntNum(TestCase): + legal_values = [ + 0, 1, -1, 2, -2, 17, -17, + 2**31-1, -2**31, 2**31, 2**32-1, -2**32, + 2**63-1, -2**63-1, 2**63, 2**64, -2**64, + 2**127-1, -2**127 + ] + overflow_values = [ + 2**127, -2**127-1 + ] + + def test_to_from(self): + for i in self.legal_values: + self.assertEquals(i, int(IntNum(i))) + self.assertEquals(i, long(IntNum(i))) + + def test_overflow(self): + for i in self.overflow_values: + self.assertRaises(OverflowError, IntNum, i) + + str_values = [ + "0", "00000", "1234", "87654321", "010101010", "FADCBEEF" + ] + base_values = [2, 8, 10, 12, 16, None, "nasm", "foo"] + + def test_from_str(self): + pass + + def test_from_str_base(self): + pass + + def test_exceptions(self): + self.assertRaises(ZeroDivisionError, IntNum(1).__div__, 0) + + IntNum(1) / 1 # make sure the above error is cleared + + try: IntNum(1) / 0 + except ZeroDivisionError, err: + self.assertEquals('divide by zero', str(err)) + + def test_xor(self): + a = IntNum(-234) + b = IntNum(432) + c = a ^ b + self.assertEquals(a, -234) + self.assertEquals(b, 432) + self.assertEquals(c, -234 ^ 432) + + def test_ixor(self): + a = IntNum(-234) + b = IntNum(432) + a ^= b; b ^= a; a ^= b + self.assertEquals(a, 432) + self.assertEquals(b, -234) + + def test_cmp(self): + a = IntNum(-1) + b = IntNum(0) + c = IntNum(1) + self.assert_(a < b < c) + self.assert_(a <= b <= c) + self.assert_(c >= b >= a) + self.assert_(c > b > a) + self.assert_(a != b != c) + + def test_abs(self): + a = IntNum(-1) + b = IntNum(0) + c = IntNum(1) + + self.assertEquals(abs(a), abs(c)) + self.assertEquals(abs(a) - abs(c), abs(b)) + +add(TIntNum) diff --git a/tools/python-yasm/tests/test_symrec.py b/tools/python-yasm/tests/test_symrec.py new file mode 100644 index 0000000..a575b19 --- /dev/null +++ b/tools/python-yasm/tests/test_symrec.py @@ -0,0 +1,80 @@ +from tests import TestCase, add +from yasm import SymbolTable, Expression, YasmError + +class TSymbolTable(TestCase): + def setUp(self): + self.symtab = SymbolTable() + + def test_keys(self): + self.assertEquals(len(self.symtab.keys()), 0) + self.symtab.declare("foo", None, 0) + keys = self.symtab.keys() + self.assertEquals(len(keys), 1) + self.assertEquals(keys[0], "foo") + + def test_contains(self): + self.assert_("foo" not in self.symtab) + self.symtab.declare("foo", None, 0) + self.assert_("foo" in self.symtab) + + def test_exception(self): + expr = Expression('+', 1, 2) + self.symtab.define_equ("foo", expr, 0) + self.assertRaises(YasmError, self.symtab.define_equ, "foo", expr, 0) + self.symtab.define_equ("bar", expr, 0) # cleared + self.assertRaises(YasmError, self.symtab.define_special, "bar", + 'global') + + def test_iters(self): + tab = self.symtab + tab.declare("foo", None, 0) + tab.declare("bar", None, 0) + tab.declare("baz", None, 0) + + # while ordering is not known, it must be consistent + self.assertEquals(list(tab.keys()), list(tab.iterkeys())) + self.assertEquals(list(tab.values()), list(tab.itervalues())) + self.assertEquals(list(tab.items()), list(tab.iteritems())) + self.assertEquals(list(tab.iteritems()), zip(tab.keys(), tab.values())) + +add(TSymbolTable) + +class TSymbolAttr(TestCase): + def setUp(self): + self.symtab = SymbolTable() + self.declsym = self.symtab.declare("foo", None, 0) + + def test_visibility(self): + sym = self.symtab.declare("local1", None, 0) + self.assertEquals(sym.visibility, set()) + sym = self.symtab.declare("local2", '', 0) + self.assertEquals(sym.visibility, set()) + sym = self.symtab.declare("local3", 'local', 0) + self.assertEquals(sym.visibility, set()) + sym = self.symtab.declare("global", 'global', 0) + self.assertEquals(sym.visibility, set(['global'])) + sym = self.symtab.declare("common", 'common', 0) + self.assertEquals(sym.visibility, set(['common'])) + sym = self.symtab.declare("extern", 'extern', 0) + self.assertEquals(sym.visibility, set(['extern'])) + sym = self.symtab.declare("dlocal", 'dlocal', 0) + self.assertEquals(sym.visibility, set(['dlocal'])) + + self.assertRaises(ValueError, + lambda: self.symtab.declare("extern2", 'foo', 0)) + def test_name(self): + self.assertEquals(self.declsym.name, "foo") + + def test_equ(self): + self.assertRaises(AttributeError, lambda: self.declsym.equ) + + def test_label(self): + self.assertRaises(AttributeError, lambda: self.declsym.label) + + def test_is_special(self): + self.assertEquals(self.declsym.is_special, False) + + def test_is_curpos(self): + self.assertEquals(self.declsym.is_curpos, False) + +add(TSymbolAttr) diff --git a/tools/python-yasm/value.pxi b/tools/python-yasm/value.pxi new file mode 100644 index 0000000..5d78c05 --- /dev/null +++ b/tools/python-yasm/value.pxi @@ -0,0 +1,56 @@ +# Python bindings for Yasm: Pyrex input file for value.h +# +# Copyright (C) 2006 Michael Urman, Peter Johnson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +cdef class Value: + cdef yasm_value value + def __cinit__(self, value=None, size=None): + cdef unsigned int sz + if size is None: + sz = 0 + else: + sz = size; + + yasm_value_initialize(&self.value, NULL, sz) + if value is None: + pass + elif isinstance(value, Expression): + yasm_value_initialize(&self.value, + yasm_expr_copy((<Expression>value).expr), sz) + elif isinstance(value, Symbol): + yasm_value_init_sym(&self.value, (<Symbol>value).sym, sz) + else: + raise TypeError("Invalid value type '%s'" % type(value)) + + def __dealloc__(self): + yasm_value_delete(&self.value) + + def finalize(self, precbc=None): + if precbc is None: + return yasm_value_finalize(&self.value, NULL) + elif isinstance(precbc, Bytecode): + return yasm_value_finalize(&self.value, (<Bytecode>precbc).bc) + else: + raise TypeError("Invalid precbc type '%s'" % type(precbc)) + diff --git a/tools/python-yasm/yasm.pyx b/tools/python-yasm/yasm.pyx new file mode 100644 index 0000000..adbc734 --- /dev/null +++ b/tools/python-yasm/yasm.pyx @@ -0,0 +1,137 @@ +# Python bindings for Yasm: Main Pyrex input file +# +# Copyright (C) 2006 Michael Urman, Peter Johnson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +"""Interface to the Yasm library. + +The Yasm library (aka libyasm) provides the core functionality of the Yasm +assembler. Classes in this library provide for manipulation of machine +instructions and object file constructs such as symbol tables and sections. + +Expression objects encapsulate complex expressions containing registers, +symbols, and operations such as SEG. + +Bytecode objects encapsulate data or code objects such as data, reserve, +align, or instructions. + +Section objects encapsulate an object file section, including the section +name, any Bytecode objects contained within that section, and other +information. + +""" + +cdef extern from "Python.h": + cdef object PyCObject_FromVoidPtr(void *cobj, void (*destr)(void *)) + cdef object PyCObject_FromVoidPtrAndDesc(void *cobj, void *desc, + void (*destr)(void *, void *)) + cdef int PyType_Check(object) + cdef int PyCObject_Check(object) + cdef void *PyCObject_AsVoidPtr(object) + cdef void *PyCObject_GetDesc(object) + + cdef object _PyLong_FromByteArray(unsigned char *bytes, unsigned int n, + int little_endian, int is_signed) + cdef int _PyLong_AsByteArray(object v, unsigned char *bytes, unsigned int n, + int little_endian, int is_signed) except -1 + + cdef void Py_INCREF(object o) + cdef void Py_DECREF(object o) + + cdef void PyErr_SetString(object type, char *message) + cdef object PyErr_Format(object type, char *format, ...) + +cdef extern from "stdlib.h": + cdef void *malloc(int n) + cdef void free(void *p) + +include "_yasm.pxi" + +cdef object __pass_voidp(void *obj, object forclass): + return PyCObject_FromVoidPtrAndDesc(obj, <void *>forclass, NULL) + +cdef void *__get_voidp(object obj, object forclass) except NULL: + cdef void* desc + + if not PyCObject_Check(obj): + msg = "obj %r is not a CObject" % obj + PyErr_SetString(TypeError, msg) + return NULL + + desc = PyCObject_GetDesc(obj) + + if desc != <void *>forclass: + if desc == NULL: + msg = "CObject type is not set (expecting %s)" % forclass + elif PyType_Check(<object>desc): + msg = "CObject is for %s not %s" % (<object>desc, forclass) + else: + msg = "CObject is incorrect (expecting %s)" % forclass + PyErr_SetString(TypeError, msg) + return NULL + + return PyCObject_AsVoidPtr(obj) + +# +# Link to associated data mechanism to keep Python references paired with +# yasm objects. +# +cdef class __assoc_data_callback: + cdef yasm_assoc_data_callback *cb + def __cinit__(self, destroy, print_): + self.cb = <yasm_assoc_data_callback *>malloc(sizeof(yasm_assoc_data_callback)) + self.cb.destroy = <void (*) (void *)>PyCObject_AsVoidPtr(destroy) + #self.cb.print_ = <void (*) (void *, FILE *, int)>PyCObject_AsVoidPtr(print_) + def __dealloc__(self): + free(self.cb) + + +cdef class Register: + cdef unsigned long reg + def __cinit__(self, reg): + self.reg = reg + +include "errwarn.pxi" +include "intnum.pxi" +include "floatnum.pxi" +include "expr.pxi" +include "symrec.pxi" +include "value.pxi" + +include "bytecode.pxi" + +cdef __initialize(): + BitVector_Boot() + yasm_intnum_initialize() + yasm_floatnum_initialize() + yasm_errwarn_initialize() + +def __cleanup(): + yasm_floatnum_cleanup() + yasm_intnum_cleanup() + yasm_errwarn_cleanup() + BitVector_Shutdown() + +__initialize() +import atexit +atexit.register(__cleanup) + diff --git a/tools/re2c/CHANGELOG b/tools/re2c/CHANGELOG new file mode 100644 index 0000000..e3dfd5a --- /dev/null +++ b/tools/re2c/CHANGELOG @@ -0,0 +1,22 @@ +re2c +---- + +YASM version +------------ +- translated to C from C++ for portability reasons + +Version 0.9.1 +------------- + +- removed rcs comments in source files + +Version 0.9 +----------- + +- redistribution based on version 0.5 +- added parentheses to assignment expressions in 'if' statements +- rearranged class members to match initialization order +- substr fix +- use array delete [] when necessary +- other minor fixes for subduing compiler warnings + diff --git a/tools/re2c/Makefile.inc b/tools/re2c/Makefile.inc new file mode 100644 index 0000000..edb89a5 --- /dev/null +++ b/tools/re2c/Makefile.inc @@ -0,0 +1,93 @@ +# These utility programs have to be built for BUILD host in cross-build. +# This makes things rather non-standard automake + +noinst_PROGRAMS += re2c + +re2c_SOURCES = +EXTRA_DIST += tools/re2c/main.c +EXTRA_DIST += tools/re2c/basics.h +EXTRA_DIST += tools/re2c/globals.h +EXTRA_DIST += tools/re2c/ins.h +EXTRA_DIST += tools/re2c/re.h +EXTRA_DIST += tools/re2c/token.h +EXTRA_DIST += tools/re2c/code.c +EXTRA_DIST += tools/re2c/dfa.h +EXTRA_DIST += tools/re2c/dfa.c +EXTRA_DIST += tools/re2c/parse.h +EXTRA_DIST += tools/re2c/parser.h +EXTRA_DIST += tools/re2c/parser.c +EXTRA_DIST += tools/re2c/actions.c +EXTRA_DIST += tools/re2c/scanner.h +EXTRA_DIST += tools/re2c/scanner.c +EXTRA_DIST += tools/re2c/mbo_getopt.h +EXTRA_DIST += tools/re2c/mbo_getopt.c +EXTRA_DIST += tools/re2c/substr.h +EXTRA_DIST += tools/re2c/substr.c +EXTRA_DIST += tools/re2c/translate.c +re2c_LDADD = re2c-main.$(OBJEXT) +re2c_LDADD += re2c-code.$(OBJEXT) +re2c_LDADD += re2c-dfa.$(OBJEXT) +re2c_LDADD += re2c-parser.$(OBJEXT) +re2c_LDADD += re2c-actions.$(OBJEXT) +re2c_LDADD += re2c-scanner.$(OBJEXT) +re2c_LDADD += re2c-mbo_getopt.$(OBJEXT) +re2c_LDADD += re2c-substr.$(OBJEXT) +re2c_LDADD += re2c-translate.$(OBJEXT) +re2c_LINK = $(CCLD_FOR_BUILD) -o $@ + +re2c-main.$(OBJEXT): tools/re2c/main.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/re2c/main.c || echo '$(srcdir)/'`tools/re2c/main.c + +re2c-code.$(OBJEXT): tools/re2c/code.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/re2c/code.c || echo '$(srcdir)/'`tools/re2c/code.c + +re2c-dfa.$(OBJEXT): tools/re2c/dfa.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/re2c/dfa.c || echo '$(srcdir)/'`tools/re2c/dfa.c + +re2c-parser.$(OBJEXT): tools/re2c/parser.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/re2c/parser.c || echo '$(srcdir)/'`tools/re2c/parser.c + +re2c-actions.$(OBJEXT): tools/re2c/actions.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/re2c/actions.c || echo '$(srcdir)/'`tools/re2c/actions.c + +re2c-scanner.$(OBJEXT): tools/re2c/scanner.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/re2c/scanner.c || echo '$(srcdir)/'`tools/re2c/scanner.c + +re2c-mbo_getopt.$(OBJEXT): tools/re2c/mbo_getopt.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/re2c/mbo_getopt.c || echo '$(srcdir)/'`tools/re2c/mbo_getopt.c + +re2c-substr.$(OBJEXT): tools/re2c/substr.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/re2c/substr.c || echo '$(srcdir)/'`tools/re2c/substr.c + +re2c-translate.$(OBJEXT): tools/re2c/translate.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(DEFAULT_INCLUDES) $(INCLUDES) \ + -c -o $@ `test -f tools/re2c/translate.c || echo '$(srcdir)/'`tools/re2c/translate.c + +EXTRA_DIST += tools/re2c/CHANGELOG +EXTRA_DIST += tools/re2c/NO_WARRANTY +EXTRA_DIST += tools/re2c/README +EXTRA_DIST += tools/re2c/scanner.re +EXTRA_DIST += tools/re2c/re2c.1 +EXTRA_DIST += tools/re2c/bootstrap/scanner.c +EXTRA_DIST += tools/re2c/doc/loplas.ps.gz +EXTRA_DIST += tools/re2c/doc/sample.bib +EXTRA_DIST += tools/re2c/examples/basemmap.c +EXTRA_DIST += tools/re2c/examples/c.re +EXTRA_DIST += tools/re2c/examples/cmmap.re +EXTRA_DIST += tools/re2c/examples/cnokw.re +EXTRA_DIST += tools/re2c/examples/cunroll.re +EXTRA_DIST += tools/re2c/examples/modula.re +EXTRA_DIST += tools/re2c/examples/repeater.re +EXTRA_DIST += tools/re2c/examples/sample.re +EXTRA_DIST += tools/re2c/examples/simple.re +EXTRA_DIST += tools/re2c/examples/rexx/README +EXTRA_DIST += tools/re2c/examples/rexx/rexx.l +EXTRA_DIST += tools/re2c/examples/rexx/scanio.c diff --git a/tools/re2c/NO_WARRANTY b/tools/re2c/NO_WARRANTY new file mode 100644 index 0000000..885a13d --- /dev/null +++ b/tools/re2c/NO_WARRANTY @@ -0,0 +1,2 @@ +re2c is distributed with no warranty whatever. The author and any other +contributors take no responsibility for the consequences of its use. diff --git a/tools/re2c/README b/tools/re2c/README new file mode 100644 index 0000000..943120f --- /dev/null +++ b/tools/re2c/README @@ -0,0 +1,153 @@ +re2c +---- + +Version 0.9.1 +Originally written by Peter Bumbulis (peterr@csg.uwaterloo.ca) +Currently maintained by Brian Young (bayoung@acm.org) + +The re2c distribution can be found at: + + http://www.tildeslash.org/re2c/index.html + +The source distribution is available from: + + http://www.tildeslash.org/re2c/re2c-0.9.1.tar.gz + +This distribution is a cleaned up version of the 0.5 release +maintained by me (Brian Young). Several bugs were fixed as well +as code cleanup for warning free compilation. It has been developed +and tested with egcs 1.0.2 and gcc 2.7.2.3 on Linux x86. Peter +Bumbulis' original release can be found at: + + ftp://csg.uwaterloo.ca/pub/peterr/re2c.0.5.tar.gz + +re2c is a great tool for writing fast and flexible lexers. It has +served many people well for many years and it deserves to be +maintained more actively. re2c is on the order of 2-3 times faster +than a flex based scanner, and its input model is much more +flexible. + +Patches and requests for features will be entertained. Areas of +particular interest to me are porting (a Solaris and an NT +version will be forthcoming) and wide character support. Note +that the code is already quite portable and should be buildable +on any platform with minor makefile changes. + +Peter's original version 0.5 ANNOUNCE and README follows. + +Brian + +-- + +re2c is a tool for generating C-based recognizers from regular +expressions. re2c-based scanners are efficient: for programming +languages, given similar specifications, an re2c-based scanner is +typically almost twice as fast as a flex-based scanner with little or no +increase in size (possibly a decrease on cisc architectures). Indeed, +re2c-based scanners are quite competitive with hand-crafted ones. + +Unlike flex, re2c does not generate complete scanners: the user must +supply some interface code. While this code is not bulky (about 50-100 +lines for a flex-like scanner; see the man page and examples in the +distribution) careful coding is required for efficiency (and +correctness). One advantage of this arrangement is that the generated +code is not tied to any particular input model. For example, re2c +generated code can be used to scan data from a null-byte terminated +buffer as illustrated below. + +Given the following source + + #define NULL ((char*) 0) + char *scan(char *p){ + char *q; + #define YYCTYPE char + #define YYCURSOR p + #define YYLIMIT p + #define YYMARKER q + #define YYFILL(n) + /*!re2c + [0-9]+ {return YYCURSOR;} + [\000-\377] {return NULL;} + */ + } + +re2c will generate + + /* Generated by re2c on Sat Apr 16 11:40:58 1994 */ + #line 1 "simple.re" + #define NULL ((char*) 0) + char *scan(char *p){ + char *q; + #define YYCTYPE char + #define YYCURSOR p + #define YYLIMIT p + #define YYMARKER q + #define YYFILL(n) + { + YYCTYPE yych; + unsigned int yyaccept; + goto yy0; + yy1: ++YYCURSOR; + yy0: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych <= '/') goto yy4; + if(yych >= ':') goto yy4; + yy2: yych = *++YYCURSOR; + goto yy7; + yy3: + #line 10 + {return YYCURSOR;} + yy4: yych = *++YYCURSOR; + yy5: + #line 11 + {return NULL;} + yy6: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + yy7: if(yych <= '/') goto yy3; + if(yych <= '9') goto yy6; + goto yy3; + } + #line 12 + + } + +Note that most compilers will perform dead-code elimination to remove +all YYCURSOR, YYLIMIT comparisions. + +re2c was developed for a particular project (constructing a fast REXX +scanner of all things!) and so while it has some rough edges, it should +be quite usable. More information about re2c can be found in the +(admittedly skimpy) man page; the algorithms and heuristics used are +described in an upcoming LOPLAS article (included in the distribution). +Probably the best way to find out more about re2c is to try the supplied +examples. re2c is written in C++, and is currently being developed +under Linux using gcc 2.5.8. + +Peter + +-- + +re2c is distributed with no warranty whatever. The code is certain to +contain errors. Neither the author nor any contributor takes +responsibility for any consequences of its use. + +re2c is in the public domain. The data structures and algorithms used +in re2c are all either taken from documents available to the general +public or are inventions of the author. Programs generated by re2c may +be distributed freely. re2c itself may be distributed freely, in source +or binary, unchanged or modified. Distributors may charge whatever fees +they can obtain for re2c. + +If you do make use of re2c, or incorporate it into a larger project an +acknowledgement somewhere (documentation, research report, etc.) would +be appreciated. + +Please send bug reports and feedback (including suggestions for +improving the distribution) to + + peterr@csg.uwaterloo.ca + +Include a small example and the banner from parser.y with bug reports. + diff --git a/tools/re2c/actions.c b/tools/re2c/actions.c new file mode 100644 index 0000000..3eaade0 --- /dev/null +++ b/tools/re2c/actions.c @@ -0,0 +1,692 @@ +#include <time.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> + +#include "tools/re2c/globals.h" +#include "tools/re2c/parse.h" +#include "tools/re2c/dfa.h" + +static Symbol *first = NULL; + +void +Symbol_init(Symbol *r, const SubStr *str) +{ + r->next = first; + Str_init(&r->name, str); + r->re = NULL; + first = r; +} + +Symbol * +Symbol_find(const SubStr *str) +{ + Symbol *sym; + for(sym = first; sym; sym = sym->next) + if(SubStr_eq(&sym->name, str)) return sym; + return Symbol_new(str); +} + +/* +void showIns(FILE *o, const Ins *i, const Ins *base){ + o.width(3); + o << &i - &base << ": "; + switch(i.i.tag){ + case CHAR: { + o << "match "; + for(const Ins *j = &(&i)[1]; j < (Ins*) i.i.link; ++j) + prtCh(o, j->c.value); + break; + } case GOTO: + o << "goto " << ((Ins*) i.i.link - &base); + break; + case FORK: + o << "fork " << ((Ins*) i.i.link - &base); + break; + case CTXT: + o << "term " << ((RuleOp*) i.i.link)->accept; + break; + case TERM: + o << "term " << ((RuleOp*) i.i.link)->accept; + break; + } + o << "\n"; +} +*/ + +static unsigned int +AltOp_fixedLength(RegExp *r) +{ + unsigned int l1 = RegExp_fixedLength(r->d.AltCatOp.exp1); + /* XXX? Should be exp2? */ + unsigned int l2 = RegExp_fixedLength(r->d.AltCatOp.exp1); + if(l1 != l2 || l1 == ~0u) + return ~0u; + return l1; +} + +static unsigned int +CatOp_fixedLength(RegExp *r) +{ + unsigned int l1, l2; + if((l1 = RegExp_fixedLength(r->d.AltCatOp.exp1)) != ~0u ) + if((l2 = RegExp_fixedLength(r->d.AltCatOp.exp2)) != ~0u) + return l1+l2; + return ~0u; +} + +unsigned int +RegExp_fixedLength(RegExp *r) +{ + switch (r->type) { + case NULLOP: + return 0; + case MATCHOP: + return 1; + case ALTOP: + return AltOp_fixedLength(r); + case CATOP: + return CatOp_fixedLength(r); + default: + return ~0u; + } + return ~0u; +} + +void +RegExp_calcSize(RegExp *re, Char *rep) +{ + Range *r; + unsigned int c; + + switch (re->type) { + case NULLOP: + re->size = 0; + break; + case MATCHOP: + re->size = 1; + for(r = re->d.match; r; r = r->next) + for(c = r->lb; c < r->ub; ++c) + if(rep[c] == c) + ++re->size; + break; + case RULEOP: + RegExp_calcSize(re->d.RuleOp.exp, rep); + RegExp_calcSize(re->d.RuleOp.ctx, rep); + re->size = re->d.RuleOp.exp->size + re->d.RuleOp.ctx->size + 1; + break; + case ALTOP: + RegExp_calcSize(re->d.AltCatOp.exp1, rep); + RegExp_calcSize(re->d.AltCatOp.exp2, rep); + re->size = re->d.AltCatOp.exp1->size + + re->d.AltCatOp.exp2->size + 2; + break; + case CATOP: + RegExp_calcSize(re->d.AltCatOp.exp1, rep); + RegExp_calcSize(re->d.AltCatOp.exp2, rep); + re->size = re->d.AltCatOp.exp1->size + re->d.AltCatOp.exp2->size; + break; + case CLOSEOP: + RegExp_calcSize(re->d.exp, rep); + re->size = re->d.exp->size + 1; + break; + case CLOSEVOP: + RegExp_calcSize(re->d.CloseVOp.exp, rep); + + if (re->d.CloseVOp.max >= 0) + re->size = (re->d.CloseVOp.exp->size * re->d.CloseVOp.min) + + ((1 + re->d.CloseVOp.exp->size) * + (re->d.CloseVOp.max - re->d.CloseVOp.min)); + else + re->size = (re->d.CloseVOp.exp->size * re->d.CloseVOp.min) + 1; + break; + } +} + +static void +MatchOp_compile(RegExp *re, Char *rep, Ins *i) +{ + Ins *j; + unsigned int bump; + Range *r; + unsigned int c; + + i->i.tag = CHAR; + i->i.link = &i[re->size]; + j = &i[1]; + bump = re->size; + for(r = re->d.match; r; r = r->next){ + for(c = r->lb; c < r->ub; ++c){ + if(rep[c] == c){ + j->c.value = c; + j->c.bump = --bump; + j++; + } + } + } +} + +static void +AltOp_compile(RegExp *re, Char *rep, Ins *i){ + Ins *j; + + i->i.tag = FORK; + j = &i[re->d.AltCatOp.exp1->size + 1]; + i->i.link = &j[1]; + RegExp_compile(re->d.AltCatOp.exp1, rep, &i[1]); + j->i.tag = GOTO; + j->i.link = &j[re->d.AltCatOp.exp2->size + 1]; + RegExp_compile(re->d.AltCatOp.exp2, rep, &j[1]); +} + +void +RegExp_compile(RegExp *re, Char *rep, Ins *i) +{ + Ins *jumppoint; + int st = 0; + + switch (re->type) { + case NULLOP: + break; + case MATCHOP: + MatchOp_compile(re, rep, i); + break; + case RULEOP: + re->d.RuleOp.ins = i; + RegExp_compile(re->d.RuleOp.exp, rep, &i[0]); + i += re->d.RuleOp.exp->size; + RegExp_compile(re->d.RuleOp.ctx, rep, &i[0]); + i += re->d.RuleOp.ctx->size; + i->i.tag = TERM; + i->i.link = re; + break; + case ALTOP: + AltOp_compile(re, rep, i); + break; + case CATOP: + RegExp_compile(re->d.AltCatOp.exp1, rep, &i[0]); + RegExp_compile(re->d.AltCatOp.exp2, rep, + &i[re->d.AltCatOp.exp1->size]); + break; + case CLOSEOP: + RegExp_compile(re->d.exp, rep, &i[0]); + i += re->d.exp->size; + i->i.tag = FORK; + i->i.link = i - re->d.exp->size; + break; + case CLOSEVOP: + jumppoint = i + ((1 + re->d.CloseVOp.exp->size) * + (re->d.CloseVOp.max - re->d.CloseVOp.min)); + for(st = re->d.CloseVOp.min; st < re->d.CloseVOp.max; st++) { + i->i.tag = FORK; + i->i.link = jumppoint; + i+=1; + RegExp_compile(re->d.CloseVOp.exp, rep, &i[0]); + i += re->d.CloseVOp.exp->size; + } + for(st = 0; st < re->d.CloseVOp.min; st++) { + RegExp_compile(re->d.CloseVOp.exp, rep, &i[0]); + i += re->d.CloseVOp.exp->size; + if(re->d.CloseVOp.max < 0 && st == 0) { + i->i.tag = FORK; + i->i.link = i - re->d.CloseVOp.exp->size; + i++; + } + } + break; + } +} + +static void +MatchOp_split(RegExp *re, CharSet *s) +{ + Range *r; + unsigned int c; + + for(r = re->d.match; r; r = r->next){ + for(c = r->lb; c < r->ub; ++c){ + CharPtn *x = s->rep[c], *a = x->nxt; + if(!a){ + if(x->card == 1) + continue; + x->nxt = a = s->freeHead; + if(!(s->freeHead = s->freeHead->nxt)) + s->freeTail = &s->freeHead; + a->nxt = NULL; + x->fix = s->fix; + s->fix = x; + } + if(--(x->card) == 0){ + *s->freeTail = x; + *(s->freeTail = &x->nxt) = NULL; + } + s->rep[c] = a; + ++(a->card); + } + } + for(; s->fix; s->fix = s->fix->fix) + if(s->fix->card) + s->fix->nxt = NULL; +} + +void +RegExp_split(RegExp *re, CharSet *s) +{ + switch (re->type) { + case NULLOP: + break; + case MATCHOP: + MatchOp_split(re, s); + break; + case RULEOP: + RegExp_split(re->d.RuleOp.exp, s); + RegExp_split(re->d.RuleOp.ctx, s); + break; + case ALTOP: + /* FALLTHROUGH */ + case CATOP: + RegExp_split(re->d.AltCatOp.exp1, s); + RegExp_split(re->d.AltCatOp.exp2, s); + break; + case CLOSEOP: + RegExp_split(re->d.exp, s); + break; + case CLOSEVOP: + RegExp_split(re->d.CloseVOp.exp, s); + break; + } +} + +void +RegExp_display(RegExp *re, FILE *o) +{ + switch (re->type) { + case NULLOP: + fputc('_', o); + break; + case MATCHOP: + Range_out(o, re->d.match); + break; + case RULEOP: + RegExp_display(re->d.RuleOp.exp, o); + fputc('/', o); + RegExp_display(re->d.RuleOp.ctx, o); + fputc(';', o); + break; + case ALTOP: + RegExp_display(re->d.AltCatOp.exp1, o); + fputc('|', o); + RegExp_display(re->d.AltCatOp.exp2, o); + break; + case CATOP: + RegExp_display(re->d.AltCatOp.exp1, o); + RegExp_display(re->d.AltCatOp.exp2, o); + break; + case CLOSEOP: + RegExp_display(re->d.exp, o); + fputc('+', o); + break; + } +} + +void +Range_out(FILE *o, const Range *r) +{ + if(!r) + return; + + if((r->ub - r->lb) == 1){ + prtCh(o, r->lb); + } else { + prtCh(o, r->lb); + fputc('-', o); + prtCh(o, r->ub-1); + } + Range_out(o, r->next); +} + +static Range *doUnion(Range *r1, Range *r2){ + Range *r, **rP = &r; + for(;;){ + Range *s; + if(r1->lb <= r2->lb){ + s = Range_new_copy(r1); + } else { + s = Range_new_copy(r2); + } + *rP = s; + rP = &s->next; + for(;;){ + if(r1->lb <= r2->lb){ + if(r1->lb > s->ub) + break; + if(r1->ub > s->ub) + s->ub = r1->ub; + if(!(r1 = r1->next)){ + unsigned int ub = 0; + for(; r2 && r2->lb <= s->ub; r2 = r2->next) + ub = r2->ub; + if(ub > s->ub) + s->ub = ub; + *rP = r2; + return r; + } + } else { + if(r2->lb > s->ub) + break; + if(r2->ub > s->ub) + s->ub = r2->ub; + if(!(r2 = r2->next)){ + unsigned int ub = 0; + for(; r1 && r1->lb <= s->ub; r1 = r1->next) + ub = r1->ub; + if(ub > s->ub) + s->ub = ub; + *rP = r1; + return r; + } + } + } + } + *rP = NULL; + return r; +} + +static Range *doDiff(Range *r1, Range *r2){ + Range *r, *s, **rP = &r; + for(; r1; r1 = r1->next){ + unsigned int lb = r1->lb; + for(; r2 && r2->ub <= r1->lb; r2 = r2->next); + for(; r2 && r2->lb < r1->ub; r2 = r2->next){ + if(lb < r2->lb){ + *rP = s = Range_new(lb, r2->lb); + rP = &s->next; + } + if((lb = r2->ub) >= r1->ub) + goto noMore; + } + *rP = s = Range_new(lb, r1->ub); + rP = &s->next; + noMore:; + } + *rP = NULL; + return r; +} + +static RegExp *merge(RegExp *m1, RegExp *m2){ + if(!m1) + return m2; + if(!m2) + return m1; + return RegExp_new_MatchOp(doUnion(m1->d.match, m2->d.match)); +} + +RegExp *mkDiff(RegExp *e1, RegExp *e2){ + RegExp *m1, *m2; + Range *r; + if(!(m1 = RegExp_isA(e1, MATCHOP))) + return NULL; + if(!(m2 = RegExp_isA(e2, MATCHOP))) + return NULL; + r = doDiff(m1->d.match, m2->d.match); + return r? RegExp_new_MatchOp(r) : RegExp_new_NullOp(); +} + +static RegExp *doAlt(RegExp *e1, RegExp *e2){ + if(!e1) + return e2; + if(!e2) + return e1; + return RegExp_new_AltOp(e1, e2); +} + +RegExp *mkAlt(RegExp *e1, RegExp *e2){ + RegExp *a; + RegExp *m1, *m2; + if((a = RegExp_isA(e1, ALTOP))){ + if((m1 = RegExp_isA(a->d.AltCatOp.exp1, MATCHOP))) + e1 = a->d.AltCatOp.exp2; + } else if((m1 = RegExp_isA(e1, MATCHOP))){ + e1 = NULL; + } + if((a = RegExp_isA(e2, ALTOP))){ + if((m2 = RegExp_isA(a->d.AltCatOp.exp1, MATCHOP))) + e2 = a->d.AltCatOp.exp2; + } else if((m2 = RegExp_isA(e2, MATCHOP))){ + e2 = NULL; + } + return doAlt(merge(m1, m2), doAlt(e1, e2)); +} + +static unsigned char unescape(SubStr *s){ + unsigned char c; + unsigned char v; + s->len--; + if((c = *s->str++) != '\\' || s->len == 0) + return xlat[c]; + s->len--; + switch(c = *s->str++){ + case 'n': + return xlat['\n']; + case 't': + return xlat['\t']; + case 'v': + return xlat['\v']; + case 'b': + return xlat['\b']; + case 'r': + return xlat['\r']; + case 'f': + return xlat['\f']; + case 'a': + return xlat['\a']; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': { + v = c - '0'; + for(; s->len != 0 && '0' <= (c = *s->str) && c <= '7'; s->len--, s->str++) + v = v*8 + (c - '0'); + return v; + } default: + return xlat[c]; + } +} + +static Range *getRange(SubStr *s){ + unsigned char lb = unescape(s), ub; + if(s->len < 2 || *s->str != '-'){ + ub = lb; + } else { + s->len--; s->str++; + ub = unescape(s); + if(ub < lb){ + unsigned char tmp; + tmp = lb; lb = ub; ub = tmp; + } + } + return Range_new(lb, ub+1); +} + +static RegExp *matchChar(unsigned int c){ + return RegExp_new_MatchOp(Range_new(c, c+1)); +} + +RegExp *strToRE(SubStr s){ + RegExp *re; + s.len -= 2; s.str += 1; + if(s.len == 0) + return RegExp_new_NullOp(); + re = matchChar(unescape(&s)); + while(s.len > 0) + re = RegExp_new_CatOp(re, matchChar(unescape(&s))); + return re; +} + +RegExp *strToCaseInsensitiveRE(SubStr s){ + unsigned char c; + RegExp *re, *reL, *reU; + s.len -= 2; s.str += 1; + if(s.len == 0) + return RegExp_new_NullOp(); + c = unescape(&s); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + reL = matchChar(tolower(c)); + reU = matchChar(toupper(c)); + re = mkAlt(reL, reU); + } else { + re = matchChar(c); + } + while(s.len > 0) { + c = unescape(&s); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + reL = matchChar(tolower(c)); + reU = matchChar(toupper(c)); + re = RegExp_new_CatOp(re, mkAlt(reL, reU)); + } else { + re = RegExp_new_CatOp(re, matchChar(c)); + } + } + return re; +} + +RegExp *ranToRE(SubStr s){ + Range *r; + s.len -= 2; s.str += 1; + if(s.len == 0) + return RegExp_new_NullOp(); + r = getRange(&s); + while(s.len > 0) + r = doUnion(r, getRange(&s)); + return RegExp_new_MatchOp(r); +} + +RegExp *invToRE(SubStr s) +{ + RegExp *any, *ran, *inv; + SubStr *ss; + + + s.len--; + s.str++; + + ss = SubStr_new("[\\000-\\377]", strlen("[\\000-\\377]")); + any = ranToRE(*ss); + free(ss); + if (s.len <= 2) + return any; + + ran = ranToRE(s); + inv = mkDiff(any, ran); + + free(ran); + free(any); + + return inv; +} + +RegExp *mkDot() +{ + SubStr *ss = SubStr_new("[\\000-\\377]", strlen("[\\000-\\377]")); + RegExp * any = ranToRE(*ss); + RegExp * ran = matchChar('\n'); + RegExp * inv = mkDiff(any, ran); + + free(ss); + free(ran); + free(any); + + return inv; +} + +RegExp * +RegExp_new_RuleOp(RegExp *e, RegExp *c, Token *t, unsigned int a) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = RULEOP; + r->d.RuleOp.exp = e; + r->d.RuleOp.ctx = c; + r->d.RuleOp.ins = NULL; + r->d.RuleOp.accept = a; + r->d.RuleOp.code = t; + return r; +} + +static void optimize(Ins *i){ + while(!isMarked(i)){ + mark(i); + if(i->i.tag == CHAR){ + i = (Ins*) i->i.link; + } else if(i->i.tag == GOTO || i->i.tag == FORK){ + Ins *target = (Ins*) i->i.link; + optimize(target); + if(target->i.tag == GOTO) + i->i.link = target->i.link == target? i : target; + if(i->i.tag == FORK){ + Ins *follow = (Ins*) &i[1]; + optimize(follow); + if(follow->i.tag == GOTO && follow->i.link == follow){ + i->i.tag = GOTO; + } else if(i->i.link == i){ + i->i.tag = GOTO; + i->i.link = follow; + } + } + return; + } else { + ++i; + } + } +} + +void genCode(FILE *o, RegExp *re){ + CharSet cs; + unsigned int j; + Char rep[nChars]; + Ins *ins, *eoi; + DFA *dfa; + + memset(&cs, 0, sizeof(cs)); + for(j = 0; j < nChars; ++j){ + cs.rep[j] = &cs.ptn[0]; + cs.ptn[j].nxt = &cs.ptn[j+1]; + } + cs.freeHead = &cs.ptn[1]; + *(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL; + cs.ptn[0].card = nChars; + cs.ptn[0].nxt = NULL; + RegExp_split(re, &cs); +/* + for(unsigned int k = 0; k < nChars;){ + for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];); + printSpan(cerr, j, k); + cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl; + } +*/ + for(j = 0; j < nChars; ++j){ + if(!cs.rep[j]->nxt) + cs.rep[j]->nxt = &cs.ptn[j]; + rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]); + } + + RegExp_calcSize(re, rep); + ins = malloc(sizeof(Ins)*(re->size+1)); + memset(ins, 0, (re->size+1)*sizeof(Ins)); + RegExp_compile(re, rep, ins); + eoi = &ins[re->size]; + eoi->i.tag = GOTO; + eoi->i.link = eoi; + + optimize(ins); + for(j = 0; j < re->size;){ + unmark(&ins[j]); + if(ins[j].i.tag == CHAR){ + j = (Ins*) ins[j].i.link - ins; + } else { + j++; + } + } + + dfa = DFA_new(ins, re->size, 0, 256, rep); + DFA_emit(dfa, o); + DFA_delete(dfa); + free(ins); +} diff --git a/tools/re2c/basics.h b/tools/re2c/basics.h new file mode 100644 index 0000000..1531e75 --- /dev/null +++ b/tools/re2c/basics.h @@ -0,0 +1,14 @@ +#ifndef re2c_basics_h +#define re2c_basics_h + +#if defined(__GNUC__) && !defined(inline) +#define inline __inline__ +#endif + +typedef unsigned char byte; +typedef unsigned short word; +typedef unsigned long dword; + +#define PACKAGE_VERSION "1.0.0" + +#endif diff --git a/tools/re2c/bootstrap/scanner.c b/tools/re2c/bootstrap/scanner.c new file mode 100644 index 0000000..fd0ca93 --- /dev/null +++ b/tools/re2c/bootstrap/scanner.c @@ -0,0 +1,748 @@ +/* Generated by re2c 0.9.1-C on Sun Oct 9 22:15:58 2005 + */ +#line 1 "scanner.re" +#include <stdlib.h> +#include <string.h> +#include "tools/re2c/scanner.h" +#include "tools/re2c/parse.h" +#include "tools/re2c/globals.h" +#include "re2c-parser.h" + +#ifndef MAX +#define MAX(a,b) (((a)>(b))?(a):(b)) +#endif + +#define BSIZE 8192 + +#define YYCTYPE unsigned char +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RETURN(i) {s->cur = cursor; return i;} + +static unsigned char *fill(Scanner*, unsigned char*); + +void +Scanner_init(Scanner *s, FILE *i) +{ + s->in = i; + s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top = + s->eof = NULL; + s->tchar = s->tline = 0; + s->cline = 1; +} + +static unsigned char * +fill(Scanner *s, unsigned char *cursor) +{ + if(!s->eof){ + unsigned int cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + unsigned char *buf = malloc(((s->lim - s->bot) + BSIZE)); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + if (s->bot) + free(s->bot); + s->bot = buf; + } + if((cnt = fread(s->lim, 1, BSIZE, s->in)) != BSIZE){ + s->eof = &s->lim[cnt]; *s->eof++ = '\0'; + } + s->lim += cnt; + } + return cursor; +} + +#line 79 "scanner.re" + + +int +Scanner_echo(Scanner *s, FILE *out) +{ + unsigned char *cursor = s->cur; + int ignore_eoc = 0; + + /* Catch EOF */ + if (s->eof && cursor == s->eof) + return 0; + + s->tok = cursor; +echo: + +#line 87 "scanner.c" +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy0; + ++YYCURSOR; +yy0: + if((YYLIMIT - YYCURSOR) < 11) YYFILL(11); + yych = *YYCURSOR; + if(yych <= ')'){ + if(yych <= '\000') goto yy7; + if(yych == '\n') goto yy5; + goto yy9; + } else { + if(yych <= '*') goto yy4; + if(yych != '/') goto yy9; + goto yy2; + } +yy2: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '*') goto yy12; + goto yy3; +yy3: +#line 117 "scanner.re" +{ goto echo; } +#line 112 "scanner.c" +yy4: yych = *++YYCURSOR; + if(yych == '/') goto yy10; + goto yy3; +yy5: yych = *++YYCURSOR; + goto yy6; +yy6: +#line 112 "scanner.re" +{ fwrite(s->tok, 1, cursor - s->tok, out); + s->tok = s->pos = cursor; s->cline++; oline++; + goto echo; } +#line 123 "scanner.c" +yy7: yych = *++YYCURSOR; + goto yy8; +yy8: +#line 115 "scanner.re" +{ fwrite(s->tok, 1, cursor - s->tok - 1, out); /* -1 so we don't write out the \0 */ + if(cursor == s->eof) { RETURN(0); } } +#line 130 "scanner.c" +yy9: yych = *++YYCURSOR; + goto yy3; +yy10: yych = *++YYCURSOR; + goto yy11; +yy11: +#line 103 "scanner.re" +{ + if (ignore_eoc) { + ignore_eoc = 0; + } else { + fwrite(s->tok, 1, cursor - s->tok, out); + } + s->tok = s->pos = cursor; + goto echo; + } +#line 146 "scanner.c" +yy12: yych = *++YYCURSOR; + if(yych == '!') goto yy14; + goto yy13; +yy13: YYCURSOR = YYMARKER; + switch(yyaccept){ + case 0: goto yy3; + } +yy14: yych = *++YYCURSOR; + if(yych == 'm') goto yy15; + if(yych == 'r') goto yy16; + goto yy13; +yy15: yych = *++YYCURSOR; + if(yych == 'a') goto yy21; + goto yy13; +yy16: yych = *++YYCURSOR; + if(yych != 'e') goto yy13; + goto yy17; +yy17: yych = *++YYCURSOR; + if(yych != '2') goto yy13; + goto yy18; +yy18: yych = *++YYCURSOR; + if(yych != 'c') goto yy13; + goto yy19; +yy19: yych = *++YYCURSOR; + goto yy20; +yy20: +#line 94 "scanner.re" +{ fwrite(s->tok, 1, &cursor[-7] - s->tok, out); + s->tok = cursor; + RETURN(1); } +#line 177 "scanner.c" +yy21: yych = *++YYCURSOR; + if(yych != 'x') goto yy13; + goto yy22; +yy22: yych = *++YYCURSOR; + if(yych != ':') goto yy13; + goto yy23; +yy23: yych = *++YYCURSOR; + if(yych != 'r') goto yy13; + goto yy24; +yy24: yych = *++YYCURSOR; + if(yych != 'e') goto yy13; + goto yy25; +yy25: yych = *++YYCURSOR; + if(yych != '2') goto yy13; + goto yy26; +yy26: yych = *++YYCURSOR; + if(yych != 'c') goto yy13; + goto yy27; +yy27: yych = *++YYCURSOR; + goto yy28; +yy28: +#line 97 "scanner.re" +{ + fprintf(out, "#define YYMAXFILL %u\n", maxFill); + s->tok = s->pos = cursor; + ignore_eoc = 1; + goto echo; + } +#line 206 "scanner.c" +} +#line 118 "scanner.re" + +} + + +int +Scanner_scan(Scanner *s) +{ + unsigned char *cursor = s->cur; + unsigned int depth; + +scan: + s->tchar = cursor - s->pos; + s->tline = s->cline; + s->tok = cursor; + +#line 224 "scanner.c" +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy29; + ++YYCURSOR; +yy29: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych <= '/'){ + if(yych <= '"'){ + if(yych <= '\n'){ + if(yych <= '\b') goto yy53; + if(yych <= '\t') goto yy47; + goto yy49; + } else { + if(yych == ' ') goto yy47; + if(yych <= '!') goto yy53; + goto yy37; + } + } else { + if(yych <= '*'){ + if(yych <= '&') goto yy53; + if(yych <= '\'') goto yy39; + if(yych <= ')') goto yy43; + goto yy35; + } else { + if(yych <= '+') goto yy44; + if(yych <= '-') goto yy53; + if(yych <= '.') goto yy51; + goto yy33; + } + } + } else { + if(yych <= '@'){ + if(yych <= '<'){ + if(yych == ';') goto yy43; + goto yy53; + } else { + if(yych <= '=') goto yy43; + if(yych == '?') goto yy44; + goto yy53; + } + } else { + if(yych <= '`'){ + if(yych <= 'Z') goto yy45; + if(yych <= '[') goto yy41; + if(yych <= '\\') goto yy43; + goto yy53; + } else { + if(yych <= 'z') goto yy45; + if(yych <= '{') goto yy31; + if(yych <= '|') goto yy43; + goto yy53; + } + } + } +yy31: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych <= '/') goto yy32; + if(yych <= '9') goto yy84; + goto yy32; +yy32: +#line 133 "scanner.re" +{ depth = 1; + goto code; + } +#line 291 "scanner.c" +yy33: yych = *++YYCURSOR; + if(yych == '*') goto yy82; + goto yy34; +yy34: +#line 163 "scanner.re" +{ RETURN(*s->tok); } +#line 298 "scanner.c" +yy35: yych = *++YYCURSOR; + if(yych == '/') goto yy80; + goto yy36; +yy36: +#line 165 "scanner.re" +{ yylval.op = *s->tok; + RETURN(CLOSE); } +#line 306 "scanner.c" +yy37: yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if(yych != '\n') goto yy76; + goto yy38; +yy38: +#line 150 "scanner.re" +{ Scanner_fatal(s, "unterminated string constant (missing \")"); } +#line 314 "scanner.c" +yy39: yyaccept = 2; + yych = *(YYMARKER = ++YYCURSOR); + if(yych != '\n') goto yy71; + goto yy40; +yy40: +#line 151 "scanner.re" +{ Scanner_fatal(s, "unterminated string constant (missing ')"); } +#line 322 "scanner.c" +yy41: yyaccept = 3; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '\n') goto yy42; + if(yych == '^') goto yy62; + goto yy60; +yy42: +#line 161 "scanner.re" +{ Scanner_fatal(s, "unterminated range (missing ])"); } +#line 331 "scanner.c" +yy43: yych = *++YYCURSOR; + goto yy34; +yy44: yych = *++YYCURSOR; + goto yy36; +yy45: yych = *++YYCURSOR; + goto yy58; +yy46: +#line 180 "scanner.re" +{ SubStr substr; + s->cur = cursor; + substr = Scanner_token(s); + yylval.symbol = Symbol_find(&substr); + return ID; } +#line 345 "scanner.c" +yy47: yych = *++YYCURSOR; + goto yy56; +yy48: +#line 186 "scanner.re" +{ goto scan; } +#line 351 "scanner.c" +yy49: yych = *++YYCURSOR; + goto yy50; +yy50: +#line 188 "scanner.re" +{ if(cursor == s->eof) RETURN(0); + s->pos = cursor; s->cline++; + goto scan; + } +#line 360 "scanner.c" +yy51: yych = *++YYCURSOR; + goto yy52; +yy52: +#line 193 "scanner.re" +{ s->cur = cursor; + yylval.regexp = mkDot(); + return RANGE; + } +#line 369 "scanner.c" +yy53: yych = *++YYCURSOR; + goto yy54; +yy54: +#line 198 "scanner.re" +{ fprintf(stderr, "unexpected character: '%c'\n", *s->tok); + goto scan; + } +#line 377 "scanner.c" +yy55: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy56; +yy56: if(yych == '\t') goto yy55; + if(yych == ' ') goto yy55; + goto yy48; +yy57: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy58; +yy58: if(yych <= '@'){ + if(yych <= '/') goto yy46; + if(yych <= '9') goto yy57; + goto yy46; + } else { + if(yych <= 'Z') goto yy57; + if(yych <= '`') goto yy46; + if(yych <= 'z') goto yy57; + goto yy46; + } +yy59: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy60; +yy60: if(yych <= '['){ + if(yych != '\n') goto yy59; + goto yy61; + } else { + if(yych <= '\\') goto yy64; + if(yych <= ']') goto yy65; + goto yy59; + } +yy61: YYCURSOR = YYMARKER; + switch(yyaccept){ + case 0: goto yy32; + case 1: goto yy38; + case 2: goto yy40; + case 3: goto yy42; + } +yy62: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy63; +yy63: if(yych <= '['){ + if(yych == '\n') goto yy61; + goto yy62; + } else { + if(yych <= '\\') goto yy67; + if(yych <= ']') goto yy68; + goto yy62; + } +yy64: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy61; + goto yy59; +yy65: yych = *++YYCURSOR; + goto yy66; +yy66: +#line 157 "scanner.re" +{ s->cur = cursor; + yylval.regexp = ranToRE(Scanner_token(s)); + return RANGE; } +#line 442 "scanner.c" +yy67: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy61; + goto yy62; +yy68: yych = *++YYCURSOR; + goto yy69; +yy69: +#line 153 "scanner.re" +{ s->cur = cursor; + yylval.regexp = invToRE(Scanner_token(s)); + return RANGE; } +#line 455 "scanner.c" +yy70: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy71; +yy71: if(yych <= '&'){ + if(yych == '\n') goto yy61; + goto yy70; + } else { + if(yych <= '\'') goto yy73; + if(yych != '\\') goto yy70; + goto yy72; + } +yy72: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy61; + goto yy70; +yy73: yych = *++YYCURSOR; + goto yy74; +yy74: +#line 146 "scanner.re" +{ s->cur = cursor; + yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s)); + return STRING; } +#line 480 "scanner.c" +yy75: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy76; +yy76: if(yych <= '!'){ + if(yych == '\n') goto yy61; + goto yy75; + } else { + if(yych <= '"') goto yy78; + if(yych != '\\') goto yy75; + goto yy77; + } +yy77: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy61; + goto yy75; +yy78: yych = *++YYCURSOR; + goto yy79; +yy79: +#line 142 "scanner.re" +{ s->cur = cursor; + yylval.regexp = strToRE(Scanner_token(s)); + return STRING; } +#line 505 "scanner.c" +yy80: yych = *++YYCURSOR; + goto yy81; +yy81: +#line 139 "scanner.re" +{ s->tok = cursor; + RETURN(0); } +#line 512 "scanner.c" +yy82: yych = *++YYCURSOR; + goto yy83; +yy83: +#line 136 "scanner.re" +{ depth = 1; + goto comment; } +#line 519 "scanner.c" +yy84: ++YYCURSOR; + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + goto yy85; +yy85: if(yych <= '/'){ + if(yych == ',') goto yy88; + goto yy61; + } else { + if(yych <= '9') goto yy84; + if(yych != '}') goto yy61; + goto yy86; + } +yy86: yych = *++YYCURSOR; + goto yy87; +yy87: +#line 168 "scanner.re" +{ yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = atoi((char *)s->tok+1); + RETURN(CLOSESIZE); } +#line 539 "scanner.c" +yy88: yych = *++YYCURSOR; + if(yych != '}') goto yy92; + goto yy89; +yy89: yych = *++YYCURSOR; + goto yy90; +yy90: +#line 176 "scanner.re" +{ yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = -1; + RETURN(CLOSESIZE); } +#line 550 "scanner.c" +yy91: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy92; +yy92: if(yych <= '/') goto yy61; + if(yych <= '9') goto yy91; + if(yych != '}') goto yy61; + goto yy93; +yy93: yych = *++YYCURSOR; + goto yy94; +yy94: +#line 172 "scanner.re" +{ yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1)); + RETURN(CLOSESIZE); } +#line 566 "scanner.c" +} +#line 201 "scanner.re" + + +code: + +#line 573 "scanner.c" +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy95; + ++YYCURSOR; +yy95: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych <= '&'){ + if(yych <= '\n'){ + if(yych <= '\t') goto yy103; + goto yy101; + } else { + if(yych == '"') goto yy105; + goto yy103; + } + } else { + if(yych <= '{'){ + if(yych <= '\'') goto yy106; + if(yych <= 'z') goto yy103; + goto yy99; + } else { + if(yych != '}') goto yy103; + goto yy97; + } + } +yy97: yych = *++YYCURSOR; + goto yy98; +yy98: +#line 205 "scanner.re" +{ if(--depth == 0){ + s->cur = cursor; + yylval.token = Token_new(Scanner_token(s), s->tline); + return CODE; + } + goto code; } +#line 610 "scanner.c" +yy99: yych = *++YYCURSOR; + goto yy100; +yy100: +#line 211 "scanner.re" +{ ++depth; + goto code; } +#line 617 "scanner.c" +yy101: yych = *++YYCURSOR; + goto yy102; +yy102: +#line 213 "scanner.re" +{ if(cursor == s->eof) Scanner_fatal(s, "missing '}'"); + s->pos = cursor; s->cline++; + goto code; + } +#line 626 "scanner.c" +yy103: yych = *++YYCURSOR; + goto yy104; +yy104: +#line 217 "scanner.re" +{ goto code; } +#line 632 "scanner.c" +yy105: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '\n') goto yy104; + goto yy112; +yy106: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '\n') goto yy104; + goto yy108; +yy107: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy108; +yy108: if(yych <= '&'){ + if(yych != '\n') goto yy107; + goto yy109; + } else { + if(yych <= '\'') goto yy103; + if(yych == '\\') goto yy110; + goto yy107; + } +yy109: YYCURSOR = YYMARKER; + switch(yyaccept){ + case 0: goto yy104; + } +yy110: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy109; + goto yy107; +yy111: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy112; +yy112: if(yych <= '!'){ + if(yych == '\n') goto yy109; + goto yy111; + } else { + if(yych <= '"') goto yy103; + if(yych != '\\') goto yy111; + goto yy113; + } +yy113: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy109; + goto yy111; +} +#line 218 "scanner.re" + + +comment: + +#line 685 "scanner.c" +{ + YYCTYPE yych; + goto yy114; + ++YYCURSOR; +yy114: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych <= ')'){ + if(yych == '\n') goto yy119; + goto yy121; + } else { + if(yych <= '*') goto yy116; + if(yych == '/') goto yy118; + goto yy121; + } +yy116: yych = *++YYCURSOR; + if(yych == '/') goto yy124; + goto yy117; +yy117: +#line 232 "scanner.re" +{ goto comment; } +#line 707 "scanner.c" +yy118: yych = *++YYCURSOR; + if(yych == '*') goto yy122; + goto yy117; +yy119: yych = *++YYCURSOR; + goto yy120; +yy120: +#line 228 "scanner.re" +{ if(cursor == s->eof) RETURN(0); + s->tok = s->pos = cursor; s->cline++; + goto comment; + } +#line 719 "scanner.c" +yy121: yych = *++YYCURSOR; + goto yy117; +yy122: yych = *++YYCURSOR; + goto yy123; +yy123: +#line 226 "scanner.re" +{ ++depth; + goto comment; } +#line 728 "scanner.c" +yy124: yych = *++YYCURSOR; + goto yy125; +yy125: +#line 222 "scanner.re" +{ if(--depth == 0) + goto scan; + else + goto comment; } +#line 737 "scanner.c" +} +#line 233 "scanner.re" + +} + +void +Scanner_fatal(Scanner *s, const char *msg) +{ + fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg); + exit(1); +} diff --git a/tools/re2c/code.c b/tools/re2c/code.c new file mode 100644 index 0000000..bd54baa --- /dev/null +++ b/tools/re2c/code.c @@ -0,0 +1,969 @@ +#ifdef _WIN32 +#include <windows.h> +#include <io.h> +#endif +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "tools/re2c/substr.h" +#include "tools/re2c/globals.h" +#include "tools/re2c/dfa.h" +#include "tools/re2c/parse.h" + +#ifdef _WIN32 +/* tmpfile() replacment for Windows. + * + * On Windows tmpfile() creates the file in the root directory. This + * may fail due to unsufficient privileges. + */ +static FILE * +win32_tmpfile (void) +{ + DWORD path_len; + WCHAR path_name[MAX_PATH + 1]; + WCHAR file_name[MAX_PATH + 1]; + HANDLE handle; + int fd; + FILE *fp; + + path_len = GetTempPathW (MAX_PATH, path_name); + if (path_len <= 0 || path_len >= MAX_PATH) + return NULL; + + if (GetTempFileNameW (path_name, L"ps_", 0, file_name) == 0) + return NULL; + + handle = CreateFileW (file_name, + GENERIC_READ | GENERIC_WRITE, + 0, + NULL, + CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE, + NULL); + if (handle == INVALID_HANDLE_VALUE) { + DeleteFileW (file_name); + return NULL; + } + + fd = _open_osfhandle((intptr_t) handle, 0); + if (fd < 0) { + CloseHandle (handle); + return NULL; + } + + fp = fdopen(fd, "w+b"); + if (fp == NULL) { + _close(fd); + return NULL; + } + + return fp; +} +#endif + +static void useLabel(size_t value) { + while (value >= vUsedLabelAlloc) { + vUsedLabels = realloc(vUsedLabels, vUsedLabelAlloc * 2); + if (!vUsedLabels) { + fputs("Out of memory.\n", stderr); + exit(EXIT_FAILURE); + } + memset(vUsedLabels + vUsedLabelAlloc, 0, vUsedLabelAlloc); + vUsedLabelAlloc *= 2; + } + vUsedLabels[value] = 1; +} + +/* there must be at least one span in list; all spans must cover + * same range + */ + +void Go_compact(Go *g){ + /* arrange so that adjacent spans have different targets */ + unsigned int i = 0, j; + for(j = 1; j < g->nSpans; ++j){ + if(g->span[j].to != g->span[i].to){ + ++i; g->span[i].to = g->span[j].to; + } + g->span[i].ub = g->span[j].ub; + } + g->nSpans = i + 1; +} + +void Go_unmap(Go *g, Go *base, State *x){ + Span *s = g->span, *b = base->span, *e = &b[base->nSpans]; + unsigned int lb = 0; + s->ub = 0; + s->to = NULL; + for(; b != e; ++b){ + if(b->to == x){ + if((s->ub - lb) > 1) + s->ub = b->ub; + } else { + if(b->to != s->to){ + if(s->ub){ + lb = s->ub; ++s; + } + s->to = b->to; + } + s->ub = b->ub; + } + } + s->ub = e[-1].ub; ++s; + g->nSpans = s - g->span; +} + +static void doGen(Go *g, State *s, unsigned char *bm, unsigned char m){ + Span *b = g->span, *e = &b[g->nSpans]; + unsigned int lb = 0; + for(; b < e; ++b){ + if(b->to == s) + for(; lb < b->ub; ++lb) bm[lb] |= m; + lb = b->ub; + } +} +#if 0 +static void prt(FILE *o, Go *g, State *s){ + Span *b = g->span, *e = &b[g->nSpans]; + unsigned int lb = 0; + for(; b < e; ++b){ + if(b->to == s) + printSpan(o, lb, b->ub); + lb = b->ub; + } +} +#endif +static int matches(Go *g1, State *s1, Go *g2, State *s2){ + Span *b1 = g1->span, *e1 = &b1[g1->nSpans]; + unsigned int lb1 = 0; + Span *b2 = g2->span, *e2 = &b2[g2->nSpans]; + unsigned int lb2 = 0; + for(;;){ + for(; b1 < e1 && b1->to != s1; ++b1) lb1 = b1->ub; + for(; b2 < e2 && b2->to != s2; ++b2) lb2 = b2->ub; + if(b1 == e1) return b2 == e2; + if(b2 == e2) return 0; + if(lb1 != lb2 || b1->ub != b2->ub) return 0; + ++b1; ++b2; + } +} + +typedef struct BitMap { + Go *go; + State *on; + struct BitMap *next; + unsigned int i; + unsigned char m; +} BitMap; + +static BitMap *BitMap_find_go(Go*, State*); +static BitMap *BitMap_find(State*); +static void BitMap_gen(FILE *, unsigned int, unsigned int); +/* static void BitMap_stats(void);*/ +static BitMap *BitMap_new(Go*, State*); + +static BitMap *BitMap_first = NULL; + +BitMap * +BitMap_new(Go *g, State *x) +{ + BitMap *b = malloc(sizeof(BitMap)); + b->go = g; + b->on = x; + b->next = BitMap_first; + BitMap_first = b; + return b; +} + +BitMap * +BitMap_find_go(Go *g, State *x){ + BitMap *b; + for(b = BitMap_first; b; b = b->next){ + if(matches(b->go, b->on, g, x)) + return b; + } + return BitMap_new(g, x); +} + +BitMap * +BitMap_find(State *x){ + BitMap *b; + for(b = BitMap_first; b; b = b->next){ + if(b->on == x) + return b; + } + return NULL; +} + +void BitMap_gen(FILE *o, unsigned int lb, unsigned int ub){ + BitMap *b = BitMap_first; + if(b){ + unsigned int n = ub - lb; + unsigned int i; + unsigned char *bm = malloc(sizeof(unsigned char)*n); + fputs("\tstatic unsigned char yybm[] = {", o); + for(i = 0; b; i += n){ + unsigned char m; + unsigned int j; + memset(bm, 0, n); + for(m = 0x80; b && m; b = b->next, m >>= 1){ + b->i = i; b->m = m; + doGen(b->go, b->on, bm-lb, m); + } + for(j = 0; j < n; ++j){ + if(j%8 == 0) {fputs("\n\t", o); oline++;} + fprintf(o, "%3u, ", (unsigned int) bm[j]); + } + } + fputs("\n\t};\n", o); oline+=2; + free(bm); + } +} + +#if 0 +void BitMap_stats(void){ + unsigned int n = 0; + BitMap *b; + for(b = BitMap_first; b; b = b->next){ + prt(stderr, b->go, b->on); fputs("\n", stderr); + ++n; + } + fprintf(stderr, "%u bitmaps\n", n); + BitMap_first = NULL; +} +#endif + +static void genGoTo(FILE *o, State *from, State *to, int *readCh, + const char *indent) +{ +#if 0 + if (*readCh && from->label + 1 != to->label) + { + fputs("%syych = *YYCURSOR;\n", indent, o); oline++; + *readCh = 0; + } +#endif + fprintf(o, "%sgoto yy%u;\n", indent, to->label); oline++; + useLabel(to->label); +} + +static void genIf(FILE *o, const char *cmp, unsigned int v, int *readCh) +{ +#if 0 + if (*readCh) + { + fputs("\tif((yych = *YYCURSOR) ", o); + *readCh = 0; + } else { +#endif + fputs("\tif(yych ", o); +#if 0 + } +#endif + fprintf(o, "%s '", cmp); + prtCh(o, v); + fputs("')", o); +} + +static void indent(FILE *o, unsigned int i){ + while(i-- > 0) + fputc('\t', o); +} + +static void need(FILE *o, unsigned int n, int *readCh) +{ + unsigned int fillIndex; + int hasFillIndex = (0<=vFillIndexes); + if (hasFillIndex) { + fillIndex = vFillIndexes++; + fprintf(o, "\tYYSETSTATE(%u);\n", fillIndex); + ++oline; + } + + if(n == 1) { + fputs("\tif(YYLIMIT == YYCURSOR) YYFILL(1);\n", o); oline++; + } else { + fprintf(o, "\tif((YYLIMIT - YYCURSOR) < %u) YYFILL(%u);\n", n, n); + oline++; + } + + if (hasFillIndex) { + fprintf(o, "yyFillLabel%u:\n", fillIndex); + ++oline; + } + + fputs("\tyych = *YYCURSOR;\n", o); oline++; + *readCh = 0; +} + +void +Action_emit(Action *a, FILE *o, int *readCh) +{ + int first = 1; + unsigned int i; + unsigned int back; + + switch (a->type) { + case MATCHACT: + if(a->state->link){ + fputs("\t++YYCURSOR;\n", o); + need(o, a->state->depth, readCh); +#if 0 + } else if (!Action_readAhead(a)) { + /* do not read next char if match */ + fputs("\t++YYCURSOR;\n", o); + *readCh = 1; +#endif + } else { + fputs("\tyych = *++YYCURSOR;\n", o); + *readCh = 0; + } + oline++; + break; + case ENTERACT: + if(a->state->link){ + fputs("\t++YYCURSOR;\n", o); + fprintf(o, "yy%u:\n", a->d.label); oline+=2; + need(o, a->state->depth, readCh); + } else { + /* we shouldn't need 'rule-following' protection here */ + fputs("\tyych = *++YYCURSOR;\n", o); + fprintf(o, "yy%u:\n", a->d.label); oline+=2; + *readCh = 0; + } + break; + case SAVEMATCHACT: + if (bUsedYYAccept) { + fprintf(o, "\tyyaccept = %u;\n", a->d.selector); + oline++; + } + if(a->state->link){ + fputs("\tYYMARKER = ++YYCURSOR;\n", o); oline++; + need(o, a->state->depth, readCh); + } else { + fputs("\tyych = *(YYMARKER = ++YYCURSOR);\n", o); oline++; + *readCh = 0; + } + break; + case MOVEACT: + break; + case ACCEPTACT: + for(i = 0; i < a->d.Accept.nRules; ++i) + if(a->d.Accept.saves[i] != ~0u){ + if(first){ + first = 0; + bUsedYYAccept = 1; + fputs("\tYYCURSOR = YYMARKER;\n", o); + fputs("\tswitch(yyaccept){\n", o); oline+=2; + } + fprintf(o, "\tcase %u:", a->d.Accept.saves[i]); + genGoTo(o, a->state, a->d.Accept.rules[i], readCh, "\t"); + } + if(!first) { + fputs("\t}\n", o); oline++; + } + break; + case RULEACT: + back = RegExp_fixedLength(a->d.rule->d.RuleOp.ctx); + if(back != ~0u && back > 0u) + fprintf(o, "\tYYCURSOR -= %u;", back); + fprintf(o, "\n"); oline++; + line_source(o, a->d.rule->d.RuleOp.code->line); + SubStr_out(&a->d.rule->d.RuleOp.code->text, o); + fprintf(o, "\n"); oline++; + if (!iFlag) + fprintf(o, "#line %u \"%s\"\n", oline++, outputFileName); + break; + } +} + +Action * +Action_new_Accept(State *x, unsigned int n, unsigned int *s, State **r) +{ + Action *a = malloc(sizeof(Action)); + a->type = ACCEPTACT; + a->state = x; + a->d.Accept.nRules = n; + a->d.Accept.saves = s; + a->d.Accept.rules = r; + x->action = a; + return a; +} + +static void doLinear(FILE *o, unsigned int i, Span *s, unsigned int n, + State *from, State *next, int *readCh){ + for(;;){ + State *bg = s[0].to; + while(n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1){ + if(s[1].to == next && n == 3){ + indent(o, i); + genIf(o, "!=", s[0].ub, readCh); + genGoTo(o, from, bg, readCh, "\t"); + indent(o, i); + genGoTo(o, from, next, readCh, "\t"); + return; + } else { + indent(o, i); + genIf(o, "==", s[0].ub, readCh); + genGoTo(o, from, s[1].to, readCh, "\t"); + } + n -= 2; s += 2; + } + if(n == 1){ + indent(o, i); + genGoTo(o, from, s[0].to, readCh, "\t"); + return; + } else if(n == 2 && bg == next){ + indent(o, i); + genIf(o, ">=", s[0].ub, readCh); + genGoTo(o, from, s[1].to, readCh, "\t"); + indent(o, i); + genGoTo(o, from, next, readCh, "\t"); + return; + } else { + indent(o, i); + genIf(o, "<=", s[0].ub - 1, readCh); + genGoTo(o, from, bg, readCh, "\t"); + n -= 1; s += 1; + } + } + indent(o, i); + genGoTo(o, from, next, readCh, "\t"); +} + +void +Go_genLinear(Go *g, FILE *o, State *from, State *next, int *readCh){ + doLinear(o, 0, g->span, g->nSpans, from, next, readCh); +} + +static void genCases(FILE *o, unsigned int lb, Span *s){ + if(lb < s->ub){ + for(;;){ + fputs("\tcase '", o); prtCh(o, lb); fputs("':", o); + if(++lb == s->ub) + break; + fputs("\n", o); oline++; + } + } +} + +void +Go_genSwitch(Go *g, FILE *o, State *from, State *next, int *readCh){ + if(g->nSpans <= 2){ + Go_genLinear(g, o, from, next, readCh); + } else { + State *def = g->span[g->nSpans-1].to; + Span **sP = malloc(sizeof(Span*)*(g->nSpans-1)), **r, **s, **t; + unsigned int i; + + t = &sP[0]; + for(i = 0; i < g->nSpans; ++i) + if(g->span[i].to != def) + *(t++) = &g->span[i]; + + if (dFlag) + fputs("\tYYDEBUG(-1, yych);\n", o); + +#if 0 + if (*readCh) { + fputs("\tswitch((yych = *YYCURSOR)) {\n", o); + *readCh = 0; + } else +#endif + fputs("\tswitch(yych){\n", o); + oline++; + while(t != &sP[0]){ + State *to; + r = s = &sP[0]; + if(*s == &g->span[0]) + genCases(o, 0, *s); + else + genCases(o, (*s)[-1].ub, *s); + to = (*s)->to; + while(++s < t){ + if((*s)->to == to) + genCases(o, (*s)[-1].ub, *s); + else + *(r++) = *s; + } + genGoTo(o, from, to, readCh, "\t"); + t = r; + } + fputs("\tdefault:", o); + genGoTo(o, from, def, readCh, "\t"); + fputs("\t}\n", o); oline++; + + free(sP); + } +} + +static void doBinary(FILE *o, unsigned int i, Span *s, unsigned int n, + State *from, State *next, int *readCh){ + if(n <= 4){ + doLinear(o, i, s, n, from, next, readCh); + } else { + unsigned int h = n/2; + indent(o, i); + genIf(o, "<=", s[h-1].ub - 1, readCh); + fputs("{\n", o); oline++; + doBinary(o, i+1, &s[0], h, from, next, readCh); + indent(o, i); fputs("\t} else {\n", o); oline++; + doBinary(o, i+1, &s[h], n - h, from, next, readCh); + indent(o, i); fputs("\t}\n", o); oline++; + } +} + +void +Go_genBinary(Go *g, FILE *o, State *from, State *next, int *readCh){ + doBinary(o, 0, g->span, g->nSpans, from, next, readCh); +} + +void +Go_genBase(Go *g, FILE *o, State *from, State *next, int *readCh){ + if(g->nSpans == 0) + return; + if(!sFlag){ + Go_genSwitch(g, o, from, next, readCh); + return; + } + if(g->nSpans > 8){ + Span *bot = &g->span[0], *top = &g->span[g->nSpans-1]; + unsigned int util; + if(bot[0].to == top[0].to){ + util = (top[-1].ub - bot[0].ub)/(g->nSpans - 2); + } else { + if(bot[0].ub > (top[0].ub - top[-1].ub)){ + util = (top[0].ub - bot[0].ub)/(g->nSpans - 1); + } else { + util = top[-1].ub/(g->nSpans - 1); + } + } + if(util <= 2){ + Go_genSwitch(g, o, from, next, readCh); + return; + } + } + if(g->nSpans > 5){ + Go_genBinary(g, o, from, next, readCh); + } else { + Go_genLinear(g, o, from, next, readCh); + } +} + +void +Go_genGoto(Go *g, FILE *o, State *from, State *next, int *readCh){ + unsigned int i; + if(bFlag){ + for(i = 0; i < g->nSpans; ++i){ + State *to = g->span[i].to; + if(to && to->isBase){ + BitMap *b = BitMap_find(to); + if(b && matches(b->go, b->on, g, to)){ + Go go; + go.span = malloc(sizeof(Span)*g->nSpans); + Go_unmap(&go, g, to); + fprintf(o, "\tif(yybm[%u+", b->i); +#if 0 + if (*readCh) + fputs("(yych = *YYCURSOR)", o); + else +#endif + fputs("yych", o); + fprintf(o, "] & %u) {\n", (unsigned int) b->m); oline++; + genGoTo(o, from, to, readCh, "\t\t"); + fputs("\t}\n", o); oline++; + Go_genBase(&go, o, from, next, readCh); + free(go.span); + return; + } + } + } + } + Go_genBase(g, o, from, next, readCh); +} + +void State_emit(State *s, FILE *o, int *readCh){ + if (vUsedLabels[s->label]) + fprintf(o, "yy%u:", s->label); + if (dFlag) + fprintf(o, "\n\tYYDEBUG(%u, *YYCURSOR);\n", s->label); + Action_emit(s->action, o, readCh); +} + +static unsigned int merge(Span *x0, State *fg, State *bg){ + Span *x = x0, *f = fg->go.span, *b = bg->go.span; + unsigned int nf = fg->go.nSpans, nb = bg->go.nSpans; + State *prev = NULL, *to; + /* NB: we assume both spans are for same range */ + for(;;){ + if(f->ub == b->ub){ + to = f->to == b->to? bg : f->to; + if(to == prev){ + --x; + } else { + x->to = prev = to; + } + x->ub = f->ub; + ++x; ++f; --nf; ++b; --nb; + if(nf == 0 && nb == 0) + return x - x0; + } + while(f->ub < b->ub){ + to = f->to == b->to? bg : f->to; + if(to == prev){ + --x; + } else { + x->to = prev = to; + } + x->ub = f->ub; + ++x; ++f; --nf; + } + while(b->ub < f->ub){ + to = b->to == f->to? bg : f->to; + if(to == prev){ + --x; + } else { + x->to = prev = to; + } + x->ub = b->ub; + ++x; ++b; --nb; + } + } +} + +const unsigned int cInfinity = ~0; + +typedef struct SCC { + State **top, **stk; +} SCC; + +static void SCC_init(SCC*, unsigned int); +static SCC *SCC_new(unsigned int); +static void SCC_destroy(SCC*); +static void SCC_delete(SCC*); +static void SCC_traverse(SCC*, State*); + +static void +SCC_init(SCC *s, unsigned int size) +{ + s->top = s->stk = malloc(sizeof(State*)*size); +} + +static SCC * +SCC_new(unsigned int size){ + SCC *s = malloc(sizeof(SCC)); + s->top = s->stk = malloc(sizeof(State*)*size); + return s; +} + +static void +SCC_destroy(SCC *s){ + free(s->stk); +} + +static void +SCC_delete(SCC *s){ + free(s->stk); + free(s); +} + +static void SCC_traverse(SCC *s, State *x){ + unsigned int k, i; + + *s->top = x; + k = ++s->top - s->stk; + x->depth = k; + for(i = 0; i < x->go.nSpans; ++i){ + State *y = x->go.span[i].to; + if(y){ + if(y->depth == 0) + SCC_traverse(s, y); + if(y->depth < x->depth) + x->depth = y->depth; + } + } + if(x->depth == k) + do { + (*--s->top)->depth = cInfinity; + (*s->top)->link = x; + } while(*s->top != x); +} + +static unsigned int maxDist(State *s){ + unsigned int mm = 0, i; + for(i = 0; i < s->go.nSpans; ++i){ + State *t = s->go.span[i].to; + if(t){ + unsigned int m = 1; + if(!t->link) { + if (t->depth == -1) + t->depth = maxDist(t); + m += t->depth; + } + if(m > mm) + mm = m; + } + } + return mm; +} + +static void calcDepth(State *head){ + State *t, *s; + for(s = head; s; s = s->next){ + if(s->link == s){ + unsigned int i; + for(i = 0; i < s->go.nSpans; ++i){ + t = s->go.span[i].to; + if(t && t->link == s) + goto inSCC; + } + s->link = NULL; + } else { + inSCC: + s->depth = maxDist(s); + } + } +} + +void DFA_findSCCs(DFA *d){ + SCC scc; + State *s; + + SCC_init(&scc, d->nStates); + for(s = d->head; s; s = s->next){ + s->depth = 0; + s->link = NULL; + } + + for(s = d->head; s; s = s->next) + if(!s->depth) + SCC_traverse(&scc, s); + + calcDepth(d->head); + + SCC_destroy(&scc); +} + +void DFA_split(DFA *d, State *s){ + State *move = State_new(); + Action_new_Move(move); + DFA_addState(d, &s->next, move); + move->link = s->link; + move->rule = s->rule; + move->go = s->go; + s->rule = NULL; + s->go.nSpans = 1; + s->go.span = malloc(sizeof(Span)); + s->go.span[0].ub = d->ubChar; + s->go.span[0].to = move; +} + +void DFA_emit(DFA *d, FILE *o){ + static unsigned int label = 0; + State *s; + unsigned int i, bitmap_brace = 0; + unsigned int nRules = 0; + unsigned int nSaves = 0; + unsigned int *saves; + unsigned int nOrgOline; + State **rules; + State *accept = NULL; + Span *span; + FILE *tmpo; + int hasFillLabels; + int maxFillIndexes, orgVFillIndexes; + unsigned int start_label; + + hasFillLabels = (0<=vFillIndexes); + if (hasFillLabels && label!=0) { + fputs("re2c : error : multiple /*!re2c blocks aren't supported when -f is specified\n", stderr); + exit(1); + } + + DFA_findSCCs(d); + d->head->link = d->head; + + maxFill = 1; + for(s = d->head; s; s = s->next) { + s->depth = maxDist(s); + if (maxFill < s->depth) + maxFill = s->depth; + if(s->rule && s->rule->d.RuleOp.accept >= nRules) + nRules = s->rule->d.RuleOp.accept + 1; + } + + saves = malloc(sizeof(unsigned int)*nRules); + memset(saves, ~0, (nRules)*sizeof(unsigned int)); + + /* mark backtracking points */ + for(s = d->head; s; s = s->next){ + RegExp *ignore = NULL;/*RuleOp*/ + if(s->rule){ + for(i = 0; i < s->go.nSpans; ++i) + if(s->go.span[i].to && !s->go.span[i].to->rule){ + free(s->action); + if(saves[s->rule->d.RuleOp.accept] == ~0u) + saves[s->rule->d.RuleOp.accept] = nSaves++; + Action_new_Save(s, saves[s->rule->d.RuleOp.accept]); + continue; + } + ignore = s->rule; + } + } + + /* insert actions */ + rules = malloc(sizeof(State*)*nRules); + memset(rules, 0, (nRules)*sizeof(State*)); + for(s = d->head; s; s = s->next){ + State *ow; + if(!s->rule){ + ow = accept; + } else { + if(!rules[s->rule->d.RuleOp.accept]){ + State *n = State_new(); + Action_new_Rule(n, s->rule); + rules[s->rule->d.RuleOp.accept] = n; + DFA_addState(d, &s->next, n); + } + ow = rules[s->rule->d.RuleOp.accept]; + } + for(i = 0; i < s->go.nSpans; ++i) + if(!s->go.span[i].to){ + if(!ow){ + ow = accept = State_new(); + Action_new_Accept(accept, nRules, saves, rules); + DFA_addState(d, &s->next, accept); + } + s->go.span[i].to = ow; + } + } + + /* split ``base'' states into two parts */ + for(s = d->head; s; s = s->next){ + s->isBase = 0; + if(s->link){ + for(i = 0; i < s->go.nSpans; ++i){ + if(s->go.span[i].to == s){ + s->isBase = 1; + DFA_split(d, s); + if(bFlag) + BitMap_find_go(&s->next->go, s); + s = s->next; + break; + } + } + } + } + + /* find ``base'' state, if possible */ + span = malloc(sizeof(Span)*(d->ubChar - d->lbChar)); + for(s = d->head; s; s = s->next){ + if(!s->link){ + for(i = 0; i < s->go.nSpans; ++i){ + State *to = s->go.span[i].to; + if(to && to->isBase){ + unsigned int nSpans; + to = to->go.span[0].to; + nSpans = merge(span, s, to); + if(nSpans < s->go.nSpans){ + free(s->go.span); + s->go.nSpans = nSpans; + s->go.span = malloc(sizeof(Span)*nSpans); + memcpy(s->go.span, span, nSpans*sizeof(Span)); + } + break; + } + } + } + } + free(span); + + free(d->head->action); + + if(bFlag) { + fputs("{\n", o); + oline++; + bitmap_brace = 1; + BitMap_gen(o, d->lbChar, d->ubChar); + } + + bUsedYYAccept = 0; + + start_label = label; + + Action_new_Enter(d->head, label++); + + for(s = d->head; s; s = s->next) + s->label = label++; + + nOrgOline = oline; + maxFillIndexes = vFillIndexes; + orgVFillIndexes = vFillIndexes; +#ifdef _WIN32 + tmpo = win32_tmpfile(); +#else + tmpo = tmpfile(); +#endif + for(s = d->head; s; s = s->next){ + int readCh = 0; + State_emit(s, tmpo, &readCh); + Go_genGoto(&s->go, tmpo, s, s->next, &readCh); + } + fclose(tmpo); + maxFillIndexes = vFillIndexes; + vFillIndexes = orgVFillIndexes; + oline = nOrgOline; + + fputs("\n", o); + oline++; + if (!iFlag) + fprintf(o, "#line %u \"%s\"\n", oline++, outputFileName); + + if (!hasFillLabels) { + fputs("{\n\tYYCTYPE yych;\n", o); + oline += 2; + if (bUsedYYAccept) { + fputs("\tunsigned int yyaccept;\n", o); + oline++; + } + } else { + fputs("{\n\n", o); + oline += 2; + } + + if (!hasFillLabels) { + fprintf(o, "\tgoto yy%u;\n", start_label); + oline++; + useLabel(label); + } else { + int i; + fputs("\tswitch(YYGETSTATE()) {\n", o); + fputs("\t\tcase -1: goto yy0;\n", o); + + for (i=0; i<maxFillIndexes; ++i) + fprintf(o, "\t\tcase %u: goto yyFillLabel%u;\n", i, i); + + fputs("\t\tdefault: /* abort() */;\n", o); + fputs("\t}\n", o); + fputs("yyNext:\n", o); + + oline += maxFillIndexes; + oline += 5; + } + + for(s = d->head; s; s = s->next){ + int readCh = 0; + State_emit(s, o, &readCh); + Go_genGoto(&s->go, o, s, s->next, &readCh); + } + fputs("}\n", o); oline++; + if (bitmap_brace) { + fputs("}\n", o); + oline++; + } + + BitMap_first = NULL; + + free(saves); + free(rules); +} diff --git a/tools/re2c/dfa.c b/tools/re2c/dfa.c new file mode 100644 index 0000000..16509de --- /dev/null +++ b/tools/re2c/dfa.c @@ -0,0 +1,253 @@ +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include "tools/re2c/globals.h" +#include "tools/re2c/substr.h" +#include "tools/re2c/dfa.h" + +#define octCh(c) ('0' + c%8) + +void prtCh(FILE *o, unsigned char c){ + unsigned char oc = talx[c]; + switch(oc){ + case '\'': fputs("\\'", o); break; + case '\n': fputs("\\n", o); break; + case '\t': fputs("\\t", o); break; + case '\v': fputs("\\v", o); break; + case '\b': fputs("\\b", o); break; + case '\r': fputs("\\r", o); break; + case '\f': fputs("\\f", o); break; + case '\a': fputs("\\a", o); break; + case '\\': fputs("\\\\", o); break; + default: + if(isprint(oc)) + fputc(oc, o); + else + fprintf(o, "\\%c%c%c", octCh(c/64), octCh(c/8), octCh(c)); + } +} + +void printSpan(FILE *o, unsigned int lb, unsigned int ub){ + if(lb > ub) + fputc('*', o); + fputc('[', o); + if((ub - lb) == 1){ + prtCh(o, lb); + } else { + prtCh(o, lb); + fputc('-', o); + prtCh(o, ub-1); + } + fputc(']', o); +} + +unsigned int +Span_show(Span *s, FILE *o, unsigned int lb) +{ + if(s->to){ + printSpan(o, lb, s->ub); + fprintf(o, " %u; ", s->to->label); + } + return s->ub; +} + +void +State_out(FILE *o, const State *s){ + unsigned int lb, i; + fprintf(o, "state %u", s->label); + if(s->rule) + fprintf(o, " accepts %u", s->rule->d.RuleOp.accept); + fputs("\n", o); oline++; + lb = 0; + for(i = 0; i < s->go.nSpans; ++i) + lb = Span_show(&s->go.span[i], o, lb); +} + +void +DFA_out(FILE *o, const DFA *dfa){ + State *s; + for(s = dfa->head; s; s = s->next) { + State_out(o, s); + fputs("\n\n", o); oline+=2; + } +} + +State * +State_new(void) +{ + State *s = malloc(sizeof(State)); + s->label = 0; + s->rule = NULL; + s->next = NULL; + s->link = NULL; + s->depth = 0; + s->kCount = 0; + s->kernel = NULL; + s->isBase = 0; + s->action = NULL; + s->go.nSpans = 0; + s->go.span = NULL; + return s; +} + +void +State_delete(State *s) +{ + if (s->kernel) + free(s->kernel); + if (s->go.span) + free(s->go.span); + free(s); +} + +static Ins **closure(Ins **cP, Ins *i){ + while(!isMarked(i)){ + mark(i); + *(cP++) = i; + if(i->i.tag == FORK){ + cP = closure(cP, i + 1); + i = (Ins*) i->i.link; + } else if(i->i.tag == GOTO){ + i = (Ins*) i->i.link; + } else + break; + } + return cP; +} + +typedef struct GoTo { + Char ch; + void *to; +} GoTo; + +DFA * +DFA_new(Ins *ins, unsigned int ni, unsigned int lb, unsigned int ub, Char *rep) +{ + DFA *d = malloc(sizeof(DFA)); + Ins **work = malloc(sizeof(Ins*)*(ni+1)); + unsigned int nc = ub - lb; + GoTo *goTo = malloc(sizeof(GoTo)*nc); + Span *span = malloc(sizeof(Span)*nc); + + d->lbChar = lb; + d->ubChar = ub; + memset((char*) goTo, 0, nc*sizeof(GoTo)); + d->tail = &d->head; + d->head = NULL; + d->nStates = 0; + d->toDo = NULL; + DFA_findState(d, work, closure(work, &ins[0]) - work); + while(d->toDo){ + State *s = d->toDo; + + Ins **cP, **iP, *i; + unsigned int nGoTos = 0; + unsigned int j; + + d->toDo = s->link; + s->rule = NULL; + for(iP = s->kernel; (i = *iP); ++iP){ + if(i->i.tag == CHAR){ + Ins *j2; + for(j2 = i + 1; j2 < (Ins*) i->i.link; ++j2){ + if(!(j2->c.link = goTo[j2->c.value - lb].to)) + goTo[nGoTos++].ch = j2->c.value; + goTo[j2->c.value - lb].to = j2; + } + } else if(i->i.tag == TERM){ + if(!s->rule || ((RegExp *)i->i.link)->d.RuleOp.accept < s->rule->d.RuleOp.accept) + s->rule = (RegExp *)i->i.link; + } + } + + for(j = 0; j < nGoTos; ++j){ + GoTo *go = &goTo[goTo[j].ch - lb]; + i = (Ins*) go->to; + for(cP = work; i; i = (Ins*) i->c.link) + cP = closure(cP, i + i->c.bump); + go->to = DFA_findState(d, work, cP - work); + } + + s->go.nSpans = 0; + for(j = 0; j < nc;){ + State *to = (State*) goTo[rep[j]].to; + while(++j < nc && goTo[rep[j]].to == to); + span[s->go.nSpans].ub = lb + j; + span[s->go.nSpans].to = to; + s->go.nSpans++; + } + + for(j = nGoTos; j-- > 0;) + goTo[goTo[j].ch - lb].to = NULL; + + s->go.span = malloc(sizeof(Span)*s->go.nSpans); + memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span)); + + Action_new_Match(s); + + } + free(work); + free(goTo); + free(span); + + return d; +} + +void +DFA_delete(DFA *d){ + State *s; + while((s = d->head)){ + d->head = s->next; + State_delete(s); + } +} + +void DFA_addState(DFA *d, State **a, State *s){ + s->label = d->nStates++; + s->next = *a; + *a = s; + if(a == d->tail) + d->tail = &s->next; +} + +State *DFA_findState(DFA *d, Ins **kernel, unsigned int kCount){ + Ins **cP, **iP, *i; + State *s; + + kernel[kCount] = NULL; + + cP = kernel; + for(iP = kernel; (i = *iP); ++iP){ + if(i->i.tag == CHAR || i->i.tag == TERM){ + *cP++ = i; + } else { + unmark(i); + } + } + kCount = cP - kernel; + kernel[kCount] = NULL; + + for(s = d->head; s; s = s->next){ + if(s->kCount == kCount){ + for(iP = s->kernel; (i = *iP); ++iP) + if(!isMarked(i)) + goto nextState; + goto unmarkAll; + } + nextState:; + } + + s = State_new(); + DFA_addState(d, d->tail, s); + s->kCount = kCount; + s->kernel = malloc(sizeof(Ins*)*(kCount+1)); + memcpy(s->kernel, kernel, (kCount+1)*sizeof(Ins*)); + s->link = d->toDo; + d->toDo = s; + +unmarkAll: + for(iP = kernel; (i = *iP); ++iP) + unmark(i); + + return s; +} diff --git a/tools/re2c/dfa.h b/tools/re2c/dfa.h new file mode 100644 index 0000000..da4d673 --- /dev/null +++ b/tools/re2c/dfa.h @@ -0,0 +1,173 @@ +#ifndef re2c_dfa_h +#define re2c_dfa_h + +#include <stdio.h> +#include "tools/re2c/re.h" + +extern void prtCh(FILE *, unsigned char); +extern void printSpan(FILE *, unsigned int, unsigned int); + +struct DFA; +struct State; + +typedef enum { + MATCHACT = 1, + ENTERACT, + SAVEMATCHACT, + MOVEACT, + ACCEPTACT, + RULEACT +} ActionType; + +typedef struct Action { + struct State *state; + ActionType type; + union { + /* data for Enter */ + unsigned int label; + /* data for SaveMatch */ + unsigned int selector; + /* data for Accept */ + struct { + unsigned int nRules; + unsigned int *saves; + struct State **rules; + } Accept; + /* data for Rule */ + RegExp *rule; /* RuleOp */ + } d; +} Action; + +void Action_emit(Action*, FILE *, int *); + +typedef struct Span { + unsigned int ub; + struct State *to; +} Span; + +unsigned int Span_show(Span*, FILE *, unsigned int); + +typedef struct Go { + unsigned int nSpans; + Span *span; +} Go; + +typedef struct State { + unsigned int label; + RegExp *rule; /* RuleOp */ + struct State *next; + struct State *link; + unsigned int depth; /* for finding SCCs */ + unsigned int kCount; + Ins **kernel; + unsigned int isBase:1; + Go go; + Action *action; +} State; + +void Go_genGoto(Go*, FILE *, State*, State*, int*); +void Go_genBase(Go*, FILE *, State*, State*, int*); +void Go_genLinear(Go*, FILE *, State*, State*, int*); +void Go_genBinary(Go*, FILE *, State*, State*, int*); +void Go_genSwitch(Go*, FILE *, State*, State*, int*); +void Go_compact(Go*); +void Go_unmap(Go*, Go*, State*); + +State *State_new(void); +void State_delete(State*); +void State_emit(State*, FILE *, int *); +void State_out(FILE *, const State*); + +typedef struct DFA { + unsigned int lbChar; + unsigned int ubChar; + unsigned int nStates; + State *head, **tail; + State *toDo; +} DFA; + +DFA *DFA_new(Ins*, unsigned int, unsigned int, unsigned int, Char*); +void DFA_delete(DFA*); +void DFA_addState(DFA*, State**, State*); +State *DFA_findState(DFA*, Ins**, unsigned int); +void DFA_split(DFA*, State*); + +void DFA_findSCCs(DFA*); +void DFA_emit(DFA*, FILE *); +void DFA_out(FILE *, const DFA*); + +static Action * +Action_new_Match(State *s) +{ + Action *a = malloc(sizeof(Action)); + a->type = MATCHACT; + a->state = s; + s->action = a; + return a; +} + +static Action * +Action_new_Enter(State *s, unsigned int l) +{ + Action *a = malloc(sizeof(Action)); + a->type = ENTERACT; + a->state = s; + a->d.label = l; + s->action = a; + return a; +} + +static Action * +Action_new_Save(State *s, unsigned int i) +{ + Action *a = malloc(sizeof(Action)); + a->type = SAVEMATCHACT; + a->state = s; + a->d.selector = i; + s->action = a; + return a; +} + +static Action * +Action_new_Move(State *s) +{ + Action *a = malloc(sizeof(Action)); + a->type = MOVEACT; + a->state = s; + s->action = a; + return a; +} + +Action *Action_new_Accept(State*, unsigned int, unsigned int*, State**); + +static Action * +Action_new_Rule(State *s, RegExp *r) /* RuleOp */ +{ + Action *a = malloc(sizeof(Action)); + a->type = RULEACT; + a->state = s; + a->d.rule = r; + s->action = a; + return a; +} + +static int +Action_isRule(Action *a) +{ + return a->type == RULEACT; +} + +static int +Action_isMatch(Action *a) +{ + return a->type == MATCHACT; +} + +static int +Action_readAhead(Action *a) +{ + return !Action_isMatch(a) || + (a->state && a->state->next && !Action_isRule(a->state->next->action)); +} + +#endif diff --git a/tools/re2c/doc/loplas.ps.gz b/tools/re2c/doc/loplas.ps.gz Binary files differnew file mode 100644 index 0000000..d1a9191 --- /dev/null +++ b/tools/re2c/doc/loplas.ps.gz diff --git a/tools/re2c/doc/sample.bib b/tools/re2c/doc/sample.bib new file mode 100644 index 0000000..1f34ab1 --- /dev/null +++ b/tools/re2c/doc/sample.bib @@ -0,0 +1,48 @@ +@Article{Bumbulis94, + author = {Peter Bumbulis and Donald D. Cowan}, + title = {RE2C -- A More Versatile Scanner Generator}, + journal = "ACM Letters on Programming Languages and Systems", + volume = 2, + number = "1--4", + year = 1994, + abstract = { + It is usually claimed that lexical analysis routines are still coded by + hand, despite the widespread availability of scanner generators, for + efficiency reasons. While efficiency is a consideration, there exist + freely available scanner generators such as GLA \cite{Gray88} that can + generate scanners that are faster than most hand-coded ones. However, + most generated scanners are tailored for a particular environment, and + retargetting these scanners to other environments, if possible, is + usually complex enough to make a hand-coded scanner more appealing. In + this paper we describe RE2C, a scanner generator that not only generates + scanners which are faster (and usually smaller) than those produced by + any other scanner generator known to the authors, including GLA, but + also adapt easily to any environment. + } +} +@Article{Gray88, + author = {Robert W. Gray}, + title = {{$\gamma$-GLA} - {A} Generator for Lexical Analyzers That + Programmers Can Use}, + journal = {USENIX Conference Proceedings}, + year = {1988}, + month = {June}, + pages = {147-160}, + abstract = {Writing an efficient lexical analyzer for even a simple + language is not a trivial task, and should not be done by hand. We + describe GLA, a tool that generates very efficient scanners. These + scanners do not use the conventional transition matrix, but instead + use a few 128 element vectors. Scanning time is only slightly + greater than the absolute minimum --- the time it takes to look at + each character in a file. The GLA language allows simple, concise + specification of scanners. Augmenting regular expressions with + auxiliary scanners easily handles nasty problems such as C comments + and C literal constants. We formalize the connection between token + scanning and token processing by associating a processor with + appropriate patterns. A library of canned descriptions simplifies the + specification of commonly used language pieces --- such as, + C\_IDENTIFIERS, C\_STRINGS, PASCAL\_COMMENTS, etc. Finally, carefully + tuned lexical analysis support modules are provided for error + handling, input buffering, storing identifiers in hash tables and + manipulating denotations.} +} diff --git a/tools/re2c/examples/basemmap.c b/tools/re2c/examples/basemmap.c new file mode 100644 index 0000000..3e5b037 --- /dev/null +++ b/tools/re2c/examples/basemmap.c @@ -0,0 +1,26 @@ +#include <stdlib.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <string.h> + +#ifndef MAP_NORESERVE +#define MAP_NORESERVE 0 +#endif + +volatile char ch; + +main(){ + struct stat statbuf; + uchar *buf; + fstat(0, &statbuf); + buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE, + 0, 0); + if(buf != (uchar*)(-1)){ + uchar *cur, *lim = &buf[statbuf.st_size]; + for(cur = buf; buf != lim; ++cur){ + ch = *cur; + } + munmap(buf, statbuf.st_size); + } +} diff --git a/tools/re2c/examples/c.re b/tools/re2c/examples/c.re new file mode 100644 index 0000000..419964f --- /dev/null +++ b/tools/re2c/examples/c.re @@ -0,0 +1,272 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#define ADDEQ 257 +#define ANDAND 258 +#define ANDEQ 259 +#define ARRAY 260 +#define ASM 261 +#define AUTO 262 +#define BREAK 263 +#define CASE 264 +#define CHAR 265 +#define CONST 266 +#define CONTINUE 267 +#define DECR 268 +#define DEFAULT 269 +#define DEREF 270 +#define DIVEQ 271 +#define DO 272 +#define DOUBLE 273 +#define ELLIPSIS 274 +#define ELSE 275 +#define ENUM 276 +#define EQL 277 +#define EXTERN 278 +#define FCON 279 +#define FLOAT 280 +#define FOR 281 +#define FUNCTION 282 +#define GEQ 283 +#define GOTO 284 +#define ICON 285 +#define ID 286 +#define IF 287 +#define INCR 288 +#define INT 289 +#define LEQ 290 +#define LONG 291 +#define LSHIFT 292 +#define LSHIFTEQ 293 +#define MODEQ 294 +#define MULEQ 295 +#define NEQ 296 +#define OREQ 297 +#define OROR 298 +#define POINTER 299 +#define REGISTER 300 +#define RETURN 301 +#define RSHIFT 302 +#define RSHIFTEQ 303 +#define SCON 304 +#define SHORT 305 +#define SIGNED 306 +#define SIZEOF 307 +#define STATIC 308 +#define STRUCT 309 +#define SUBEQ 310 +#define SWITCH 311 +#define TYPEDEF 312 +#define UNION 313 +#define UNSIGNED 314 +#define VOID 315 +#define VOLATILE 316 +#define WHILE 317 +#define XOREQ 318 +#define EOI 319 + +typedef unsigned int uint; +typedef unsigned char uchar; + +#define BSIZE 8192 + +#define YYCTYPE uchar +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RET(i) {s->cur = cursor; return i;} + +typedef struct Scanner { + int fd; + uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + uint line; +} Scanner; + +uchar *fill(Scanner *s, uchar *cursor){ + if(!s->eof){ + uint cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + free(s->bot); + s->bot = buf; + } + if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ + s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; + } + s->lim += cnt; + } + return cursor; +} + +int scan(Scanner *s){ + uchar *cursor = s->cur; +std: + s->tok = cursor; +/*!re2c +any = [\000-\377]; +O = [0-7]; +D = [0-9]; +L = [a-zA-Z_]; +H = [a-fA-F0-9]; +E = [Ee] [+-]? D+; +FS = [fFlL]; +IS = [uUlL]*; +ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); +*/ + +/*!re2c + "/*" { goto comment; } + + "auto" { RET(AUTO); } + "break" { RET(BREAK); } + "case" { RET(CASE); } + "char" { RET(CHAR); } + "const" { RET(CONST); } + "continue" { RET(CONTINUE); } + "default" { RET(DEFAULT); } + "do" { RET(DO); } + "double" { RET(DOUBLE); } + "else" { RET(ELSE); } + "enum" { RET(ENUM); } + "extern" { RET(EXTERN); } + "float" { RET(FLOAT); } + "for" { RET(FOR); } + "goto" { RET(GOTO); } + "if" { RET(IF); } + "int" { RET(INT); } + "long" { RET(LONG); } + "register" { RET(REGISTER); } + "return" { RET(RETURN); } + "short" { RET(SHORT); } + "signed" { RET(SIGNED); } + "sizeof" { RET(SIZEOF); } + "static" { RET(STATIC); } + "struct" { RET(STRUCT); } + "switch" { RET(SWITCH); } + "typedef" { RET(TYPEDEF); } + "union" { RET(UNION); } + "unsigned" { RET(UNSIGNED); } + "void" { RET(VOID); } + "volatile" { RET(VOLATILE); } + "while" { RET(WHILE); } + + L (L|D)* { RET(ID); } + + ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | + (['] (ESC|any\[\n\\'])* [']) + { RET(ICON); } + + (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) + { RET(FCON); } + + (["] (ESC|any\[\n\\"])* ["]) + { RET(SCON); } + + "..." { RET(ELLIPSIS); } + ">>=" { RET(RSHIFTEQ); } + "<<=" { RET(LSHIFTEQ); } + "+=" { RET(ADDEQ); } + "-=" { RET(SUBEQ); } + "*=" { RET(MULEQ); } + "/=" { RET(DIVEQ); } + "%=" { RET(MODEQ); } + "&=" { RET(ANDEQ); } + "^=" { RET(XOREQ); } + "|=" { RET(OREQ); } + ">>" { RET(RSHIFT); } + "<<" { RET(LSHIFT); } + "++" { RET(INCR); } + "--" { RET(DECR); } + "->" { RET(DEREF); } + "&&" { RET(ANDAND); } + "||" { RET(OROR); } + "<=" { RET(LEQ); } + ">=" { RET(GEQ); } + "==" { RET(EQL); } + "!=" { RET(NEQ); } + ";" { RET(';'); } + "{" { RET('{'); } + "}" { RET('}'); } + "," { RET(','); } + ":" { RET(':'); } + "=" { RET('='); } + "(" { RET('('); } + ")" { RET(')'); } + "[" { RET('['); } + "]" { RET(']'); } + "." { RET('.'); } + "&" { RET('&'); } + "!" { RET('!'); } + "~" { RET('~'); } + "-" { RET('-'); } + "+" { RET('+'); } + "*" { RET('*'); } + "/" { RET('/'); } + "%" { RET('%'); } + "<" { RET('<'); } + ">" { RET('>'); } + "^" { RET('^'); } + "|" { RET('|'); } + "?" { RET('?'); } + + + [ \t\v\f]+ { goto std; } + + "\n" + { + if(cursor == s->eof) RET(EOI); + s->pos = cursor; s->line++; + goto std; + } + + any + { + printf("unexpected character: %c\n", *s->tok); + goto std; + } +*/ + +comment: +/*!re2c + "*/" { goto std; } + "\n" + { + if(cursor == s->eof) RET(EOI); + s->tok = s->pos = cursor; s->line++; + goto comment; + } + any { goto comment; } +*/ +} + +main(){ + Scanner in; + int t; + memset((char*) &in, 0, sizeof(in)); + in.fd = 0; + while((t = scan(&in)) != EOI){ +/* + printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); + printf("%d\n", t); +*/ + } + close(in.fd); +} diff --git a/tools/re2c/examples/cmmap.re b/tools/re2c/examples/cmmap.re new file mode 100644 index 0000000..bc4d498 --- /dev/null +++ b/tools/re2c/examples/cmmap.re @@ -0,0 +1,267 @@ +#include <stdlib.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <string.h> + +#define ADDEQ 257 +#define ANDAND 258 +#define ANDEQ 259 +#define ARRAY 260 +#define ASM 261 +#define AUTO 262 +#define BREAK 263 +#define CASE 264 +#define CHAR 265 +#define CONST 266 +#define CONTINUE 267 +#define DECR 268 +#define DEFAULT 269 +#define DEREF 270 +#define DIVEQ 271 +#define DO 272 +#define DOUBLE 273 +#define ELLIPSIS 274 +#define ELSE 275 +#define ENUM 276 +#define EQL 277 +#define EXTERN 278 +#define FCON 279 +#define FLOAT 280 +#define FOR 281 +#define FUNCTION 282 +#define GEQ 283 +#define GOTO 284 +#define ICON 285 +#define ID 286 +#define IF 287 +#define INCR 288 +#define INT 289 +#define LEQ 290 +#define LONG 291 +#define LSHIFT 292 +#define LSHIFTEQ 293 +#define MODEQ 294 +#define MULEQ 295 +#define NEQ 296 +#define OREQ 297 +#define OROR 298 +#define POINTER 299 +#define REGISTER 300 +#define RETURN 301 +#define RSHIFT 302 +#define RSHIFTEQ 303 +#define SCON 304 +#define SHORT 305 +#define SIGNED 306 +#define SIZEOF 307 +#define STATIC 308 +#define STRUCT 309 +#define SUBEQ 310 +#define SWITCH 311 +#define TYPEDEF 312 +#define UNION 313 +#define UNSIGNED 314 +#define VOID 315 +#define VOLATILE 316 +#define WHILE 317 +#define XOREQ 318 +#define EOI 319 + +typedef unsigned int unint; +typedef unsigned char uchar; + +#define YYCTYPE uchar +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RET(i) {s->cur = cursor; return i;} + +typedef struct Scanner { + uchar *tok, *ptr, *cur, *pos, *lim, *eof; + unint line; +} Scanner; + +uchar *fill(Scanner *s, uchar *cursor){ + if(!s->eof){ + unint cnt = s->lim - s->tok; + uchar *buf = malloc((cnt + 1)*sizeof(uchar)); + memcpy(buf, s->tok, cnt); + cursor = &buf[cursor - s->tok]; + s->pos = &buf[s->pos - s->tok]; + s->ptr = &buf[s->ptr - s->tok]; + s->lim = &buf[cnt]; + s->eof = s->lim; *(s->eof)++ = '\n'; + s->tok = buf; + } + return cursor; +} + +int scan(Scanner *s){ + uchar *cursor = s->cur; +std: + s->tok = cursor; +/*!re2c +any = [\000-\377]; +O = [0-7]; +D = [0-9]; +L = [a-zA-Z_]; +H = [a-fA-F0-9]; +E = [Ee] [+-]? D+; +FS = [fFlL]; +IS = [uUlL]*; +ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); +*/ + +/*!re2c + "/*" { goto comment; } + + "auto" { RET(AUTO); } + "break" { RET(BREAK); } + "case" { RET(CASE); } + "char" { RET(CHAR); } + "const" { RET(CONST); } + "continue" { RET(CONTINUE); } + "default" { RET(DEFAULT); } + "do" { RET(DO); } + "double" { RET(DOUBLE); } + "else" { RET(ELSE); } + "enum" { RET(ENUM); } + "extern" { RET(EXTERN); } + "float" { RET(FLOAT); } + "for" { RET(FOR); } + "goto" { RET(GOTO); } + "if" { RET(IF); } + "int" { RET(INT); } + "long" { RET(LONG); } + "register" { RET(REGISTER); } + "return" { RET(RETURN); } + "short" { RET(SHORT); } + "signed" { RET(SIGNED); } + "sizeof" { RET(SIZEOF); } + "static" { RET(STATIC); } + "struct" { RET(STRUCT); } + "switch" { RET(SWITCH); } + "typedef" { RET(TYPEDEF); } + "union" { RET(UNION); } + "unsigned" { RET(UNSIGNED); } + "void" { RET(VOID); } + "volatile" { RET(VOLATILE); } + "while" { RET(WHILE); } + + L (L|D)* { RET(ID); } + + ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | + (['] (ESC|any\[\n\\'])* [']) + { RET(ICON); } + + (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) + { RET(FCON); } + + (["] (ESC|any\[\n\\"])* ["]) + { RET(SCON); } + + "..." { RET(ELLIPSIS); } + ">>=" { RET(RSHIFTEQ); } + "<<=" { RET(LSHIFTEQ); } + "+=" { RET(ADDEQ); } + "-=" { RET(SUBEQ); } + "*=" { RET(MULEQ); } + "/=" { RET(DIVEQ); } + "%=" { RET(MODEQ); } + "&=" { RET(ANDEQ); } + "^=" { RET(XOREQ); } + "|=" { RET(OREQ); } + ">>" { RET(RSHIFT); } + "<<" { RET(LSHIFT); } + "++" { RET(INCR); } + "--" { RET(DECR); } + "->" { RET(DEREF); } + "&&" { RET(ANDAND); } + "||" { RET(OROR); } + "<=" { RET(LEQ); } + ">=" { RET(GEQ); } + "==" { RET(EQL); } + "!=" { RET(NEQ); } + ";" { RET(';'); } + "{" { RET('{'); } + "}" { RET('}'); } + "," { RET(','); } + ":" { RET(':'); } + "=" { RET('='); } + "(" { RET('('); } + ")" { RET(')'); } + "[" { RET('['); } + "]" { RET(']'); } + "." { RET('.'); } + "&" { RET('&'); } + "!" { RET('!'); } + "~" { RET('~'); } + "-" { RET('-'); } + "+" { RET('+'); } + "*" { RET('*'); } + "/" { RET('/'); } + "%" { RET('%'); } + "<" { RET('<'); } + ">" { RET('>'); } + "^" { RET('^'); } + "|" { RET('|'); } + "?" { RET('?'); } + + + [ \t\v\f]+ { goto std; } + + "\n" + { + if(cursor == s->eof) RET(EOI); + s->pos = cursor; s->line++; + goto std; + } + + any + { + printf("unexpected character: %c\n", *s->tok); + goto std; + } +*/ + +comment: +/*!re2c + "*/" { goto std; } + "\n" + { + if(cursor == s->eof) RET(EOI); + s->tok = s->pos = cursor; s->line++; + goto comment; + } + any { goto comment; } +*/ +} + +#ifndef MAP_NORESERVE +#define MAP_NORESERVE 0 +#endif + +main(){ + Scanner in; + struct stat statbuf; + uchar *buf; + fstat(0, &statbuf); + buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE, + 0, 0); + if(buf != (uchar*)(-1)){ + int t; + in.lim = &(in.cur = buf)[statbuf.st_size]; + in.pos = NULL; + in.eof = NULL; + while((t = scan(&in)) != EOI){ +/* + printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); + printf("%d\n", t); +*/ + } + munmap(buf, statbuf.st_size); + } +} diff --git a/tools/re2c/examples/cnokw.re b/tools/re2c/examples/cnokw.re new file mode 100644 index 0000000..bdc1279 --- /dev/null +++ b/tools/re2c/examples/cnokw.re @@ -0,0 +1,239 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#define ADDEQ 257 +#define ANDAND 258 +#define ANDEQ 259 +#define ARRAY 260 +#define ASM 261 +#define AUTO 262 +#define BREAK 263 +#define CASE 264 +#define CHAR 265 +#define CONST 266 +#define CONTINUE 267 +#define DECR 268 +#define DEFAULT 269 +#define DEREF 270 +#define DIVEQ 271 +#define DO 272 +#define DOUBLE 273 +#define ELLIPSIS 274 +#define ELSE 275 +#define ENUM 276 +#define EQL 277 +#define EXTERN 278 +#define FCON 279 +#define FLOAT 280 +#define FOR 281 +#define FUNCTION 282 +#define GEQ 283 +#define GOTO 284 +#define ICON 285 +#define ID 286 +#define IF 287 +#define INCR 288 +#define INT 289 +#define LEQ 290 +#define LONG 291 +#define LSHIFT 292 +#define LSHIFTEQ 293 +#define MODEQ 294 +#define MULEQ 295 +#define NEQ 296 +#define OREQ 297 +#define OROR 298 +#define POINTER 299 +#define REGISTER 300 +#define RETURN 301 +#define RSHIFT 302 +#define RSHIFTEQ 303 +#define SCON 304 +#define SHORT 305 +#define SIGNED 306 +#define SIZEOF 307 +#define STATIC 308 +#define STRUCT 309 +#define SUBEQ 310 +#define SWITCH 311 +#define TYPEDEF 312 +#define UNION 313 +#define UNSIGNED 314 +#define VOID 315 +#define VOLATILE 316 +#define WHILE 317 +#define XOREQ 318 +#define EOI 319 + +typedef unsigned int uint; +typedef unsigned char uchar; + +#define BSIZE 8192 + +#define YYCTYPE uchar +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RET(i) {s->cur = cursor; return i;} + +typedef struct Scanner { + int fd; + uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + uint line; +} Scanner; + +uchar *fill(Scanner *s, uchar *cursor){ + if(!s->eof){ + uint cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + free(s->bot); + s->bot = buf; + } + if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ + s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; + } + s->lim += cnt; + } + return cursor; +} + +int scan(Scanner *s){ + uchar *cursor = s->cur; +std: + s->tok = cursor; +/*!re2c +any = [\000-\377]; +O = [0-7]; +D = [0-9]; +L = [a-zA-Z_]; +H = [a-fA-F0-9]; +E = [Ee] [+-]? D+; +FS = [fFlL]; +IS = [uUlL]*; +ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); +*/ + +/*!re2c + "/*" { goto comment; } + + L (L|D)* { RET(ID); } + + ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | + (['] (ESC|any\[\n\\'])* [']) + { RET(ICON); } + + (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) + { RET(FCON); } + + (["] (ESC|any\[\n\\"])* ["]) + { RET(SCON); } + + "..." { RET(ELLIPSIS); } + ">>=" { RET(RSHIFTEQ); } + "<<=" { RET(LSHIFTEQ); } + "+=" { RET(ADDEQ); } + "-=" { RET(SUBEQ); } + "*=" { RET(MULEQ); } + "/=" { RET(DIVEQ); } + "%=" { RET(MODEQ); } + "&=" { RET(ANDEQ); } + "^=" { RET(XOREQ); } + "|=" { RET(OREQ); } + ">>" { RET(RSHIFT); } + "<<" { RET(LSHIFT); } + "++" { RET(INCR); } + "--" { RET(DECR); } + "->" { RET(DEREF); } + "&&" { RET(ANDAND); } + "||" { RET(OROR); } + "<=" { RET(LEQ); } + ">=" { RET(GEQ); } + "==" { RET(EQL); } + "!=" { RET(NEQ); } + ";" { RET(';'); } + "{" { RET('{'); } + "}" { RET('}'); } + "," { RET(','); } + ":" { RET(':'); } + "=" { RET('='); } + "(" { RET('('); } + ")" { RET(')'); } + "[" { RET('['); } + "]" { RET(']'); } + "." { RET('.'); } + "&" { RET('&'); } + "!" { RET('!'); } + "~" { RET('~'); } + "-" { RET('-'); } + "+" { RET('+'); } + "*" { RET('*'); } + "/" { RET('/'); } + "%" { RET('%'); } + "<" { RET('<'); } + ">" { RET('>'); } + "^" { RET('^'); } + "|" { RET('|'); } + "?" { RET('?'); } + + + [ \t\v\f]+ { goto std; } + + "\n" + { + if(cursor == s->eof) RET(EOI); + s->pos = cursor; s->line++; + goto std; + } + + any + { + printf("unexpected character: %c\n", *s->tok); + goto std; + } +*/ + +comment: +/*!re2c + "*/" { goto std; } + "\n" + { + if(cursor == s->eof) RET(EOI); + s->tok = s->pos = cursor; s->line++; + goto comment; + } + any { goto comment; } +*/ +} + +main(){ + Scanner in; + int t; + memset((char*) &in, 0, sizeof(in)); + in.fd = 0; + while((t = scan(&in)) != EOI){ +/* + printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); + printf("%d\n", t); +*/ + } + close(in.fd); +} diff --git a/tools/re2c/examples/cunroll.re b/tools/re2c/examples/cunroll.re new file mode 100644 index 0000000..dd9d805 --- /dev/null +++ b/tools/re2c/examples/cunroll.re @@ -0,0 +1,258 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#define ADDEQ 257 +#define ANDAND 258 +#define ANDEQ 259 +#define ARRAY 260 +#define ASM 261 +#define AUTO 262 +#define BREAK 263 +#define CASE 264 +#define CHAR 265 +#define CONST 266 +#define CONTINUE 267 +#define DECR 268 +#define DEFAULT 269 +#define DEREF 270 +#define DIVEQ 271 +#define DO 272 +#define DOUBLE 273 +#define ELLIPSIS 274 +#define ELSE 275 +#define ENUM 276 +#define EQL 277 +#define EXTERN 278 +#define FCON 279 +#define FLOAT 280 +#define FOR 281 +#define FUNCTION 282 +#define GEQ 283 +#define GOTO 284 +#define ICON 285 +#define ID 286 +#define IF 287 +#define INCR 288 +#define INT 289 +#define LEQ 290 +#define LONG 291 +#define LSHIFT 292 +#define LSHIFTEQ 293 +#define MODEQ 294 +#define MULEQ 295 +#define NEQ 296 +#define OREQ 297 +#define OROR 298 +#define POINTER 299 +#define REGISTER 300 +#define RETURN 301 +#define RSHIFT 302 +#define RSHIFTEQ 303 +#define SCON 304 +#define SHORT 305 +#define SIGNED 306 +#define SIZEOF 307 +#define STATIC 308 +#define STRUCT 309 +#define SUBEQ 310 +#define SWITCH 311 +#define TYPEDEF 312 +#define UNION 313 +#define UNSIGNED 314 +#define VOID 315 +#define VOLATILE 316 +#define WHILE 317 +#define XOREQ 318 +#define EOI 319 + +typedef unsigned int uint; +typedef unsigned char uchar; + +#define BSIZE 8192 + +#define YYCTYPE uchar +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RET(i) {s->cur = cursor; return i;} + +typedef struct Scanner { + int fd; + uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + uint line; +} Scanner; + +uchar *fill(Scanner *s, uchar *cursor){ + if(!s->eof){ + uint cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + free(s->bot); + s->bot = buf; + } + if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ + s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; + } + s->lim += cnt; + } + return cursor; +} + +int scan(Scanner *s){ + uchar *cursor = s->cur; +std: + s->tok = cursor; +/*!re2c +any = [\000-\377]; +O = [0-7]; +D = [0-9]; +L = [a-zA-Z_]; +I = L|D; +H = [a-fA-F0-9]; +E = [Ee] [+-]? D+; +FS = [fFlL]; +IS = [uUlL]*; +ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); +X = any\[*/]; +*/ + +/*!re2c + "/*" { goto comment; } + + + L { RET(ID); } + L I { RET(ID); } + L I I { RET(ID); } + L I I I { RET(ID); } + L I I I I { RET(ID); } + L I I I I I { RET(ID); } + L I I I I I I { RET(ID); } + L I I I I I I I { RET(ID); } + L I* { RET(ID); } + + ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | + (['] (ESC|any\[\n\\'])* [']) + { RET(ICON); } + + (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) + { RET(FCON); } + + (["] (ESC|any\[\n\\"])* ["]) + { RET(SCON); } + + "..." { RET(ELLIPSIS); } + ">>=" { RET(RSHIFTEQ); } + "<<=" { RET(LSHIFTEQ); } + "+=" { RET(ADDEQ); } + "-=" { RET(SUBEQ); } + "*=" { RET(MULEQ); } + "/=" { RET(DIVEQ); } + "%=" { RET(MODEQ); } + "&=" { RET(ANDEQ); } + "^=" { RET(XOREQ); } + "|=" { RET(OREQ); } + ">>" { RET(RSHIFT); } + "<<" { RET(LSHIFT); } + "++" { RET(INCR); } + "--" { RET(DECR); } + "->" { RET(DEREF); } + "&&" { RET(ANDAND); } + "||" { RET(OROR); } + "<=" { RET(LEQ); } + ">=" { RET(GEQ); } + "==" { RET(EQL); } + "!=" { RET(NEQ); } + ";" { RET(';'); } + "{" { RET('{'); } + "}" { RET('}'); } + "," { RET(','); } + ":" { RET(':'); } + "=" { RET('='); } + "(" { RET('('); } + ")" { RET(')'); } + "[" { RET('['); } + "]" { RET(']'); } + "." { RET('.'); } + "&" { RET('&'); } + "!" { RET('!'); } + "~" { RET('~'); } + "-" { RET('-'); } + "+" { RET('+'); } + "*" { RET('*'); } + "/" { RET('/'); } + "%" { RET('%'); } + "<" { RET('<'); } + ">" { RET('>'); } + "^" { RET('^'); } + "|" { RET('|'); } + "?" { RET('?'); } + + + [ \t\v\f]+ { goto std; } + + "\n" + { + if(cursor == s->eof) RET(EOI); + s->pos = cursor; s->line++; + goto std; + } + + any + { + printf("unexpected character: %c\n", *s->tok); + goto std; + } +*/ + +comment: +/*!re2c + "*/" { goto std; } + "\n" + { + if(cursor == s->eof) RET(EOI); + s->tok = s->pos = cursor; s->line++; + goto comment; + } + X { goto comment; } + X X { goto comment; } + X X X { goto comment; } + X X X X { goto comment; } + X X X X X { goto comment; } + X X X X X X { goto comment; } + X X X X X X X { goto comment; } + X X X X X X X X { goto comment; } + any { goto comment; } +*/ +} + +main(){ + Scanner in; + int t; + memset((char*) &in, 0, sizeof(in)); + in.fd = 0; + while((t = scan(&in)) != EOI){ +/* + printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); + printf("%d\n", t); +*/ + } + close(in.fd); +} diff --git a/tools/re2c/examples/modula.re b/tools/re2c/examples/modula.re new file mode 100644 index 0000000..0468ba4 --- /dev/null +++ b/tools/re2c/examples/modula.re @@ -0,0 +1,202 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +typedef unsigned int uint; +typedef unsigned char uchar; + +#define BSIZE 8192 + +#define YYCTYPE uchar +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL {cursor = fill(s, cursor);} + +#define RETURN(i) {s->cur = cursor; return i;} + +typedef struct Scanner { + int fd; + uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + uint line; +} Scanner; + +uchar *fill(Scanner *s, uchar *cursor){ + if(!s->eof){ + uint cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + free(s->bot); + s->bot = buf; + } + if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ + s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; + } + s->lim += cnt; + } + return cursor; +} + +int scan(Scanner *s){ + uchar *cursor = s->cur; + uint depth; +std: + s->tok = cursor; +/*!re2c +any = [\000-\377]; +digit = [0-9]; +letter = [a-zA-Z]; +*/ + +/*!re2c + "(*" { depth = 1; goto comment; } + + digit + {RETURN(1);} + digit + / ".." {RETURN(1);} + [0-7] + "B" {RETURN(2);} + [0-7] + "C" {RETURN(3);} + digit [0-9A-F] * "H" {RETURN(4);} + digit + "." digit * ("E" ([+-]) ? digit +) ? {RETURN(5);} + ['] (any\[\n']) * ['] | ["] (any\[\n"]) * ["] {RETURN(6);} + + "#" {RETURN(7);} + "&" {RETURN(8);} + "(" {RETURN(9);} + ")" {RETURN(10);} + "*" {RETURN(11);} + "+" {RETURN(12);} + "," {RETURN(13);} + "-" {RETURN(14);} + "." {RETURN(15);} + ".." {RETURN(16);} + "/" {RETURN(17);} + ":" {RETURN(18);} + ":=" {RETURN(19);} + ";" {RETURN(20);} + "<" {RETURN(21);} + "<=" {RETURN(22);} + "<>" {RETURN(23);} + "=" {RETURN(24);} + ">" {RETURN(25);} + ">=" {RETURN(26);} + "[" {RETURN(27);} + "]" {RETURN(28);} + "^" {RETURN(29);} + "{" {RETURN(30);} + "|" {RETURN(31);} + "}" {RETURN(32);} + "~" {RETURN(33);} + + "AND" {RETURN(34);} + "ARRAY" {RETURN(35);} + "BEGIN" {RETURN(36);} + "BY" {RETURN(37);} + "CASE" {RETURN(38);} + "CONST" {RETURN(39);} + "DEFINITION" {RETURN(40);} + "DIV" {RETURN(41);} + "DO" {RETURN(42);} + "ELSE" {RETURN(43);} + "ELSIF" {RETURN(44);} + "END" {RETURN(45);} + "EXIT" {RETURN(46);} + "EXPORT" {RETURN(47);} + "FOR" {RETURN(48);} + "FROM" {RETURN(49);} + "IF" {RETURN(50);} + "IMPLEMENTATION" {RETURN(51);} + "IMPORT" {RETURN(52);} + "IN" {RETURN(53);} + "LOOP" {RETURN(54);} + "MOD" {RETURN(55);} + "MODULE" {RETURN(56);} + "NOT" {RETURN(57);} + "OF" {RETURN(58);} + "OR" {RETURN(59);} + "POINTER" {RETURN(60);} + "PROCEDURE" {RETURN(61);} + "QUALIFIED" {RETURN(62);} + "RECORD" {RETURN(63);} + "REPEAT" {RETURN(64);} + "RETURN" {RETURN(65);} + "SET" {RETURN(66);} + "THEN" {RETURN(67);} + "TO" {RETURN(68);} + "TYPE" {RETURN(69);} + "UNTIL" {RETURN(70);} + "VAR" {RETURN(71);} + "WHILE" {RETURN(72);} + "WITH" {RETURN(73);} + + letter (letter | digit) * {RETURN(74);} + + [ \t]+ { goto std; } + + "\n" + { + if(cursor == s->eof) RETURN(0); + s->pos = cursor; s->line++; + goto std; + } + + any + { + printf("unexpected character: %c\n", *s->tok); + goto std; + } +*/ +comment: +/*!re2c + "*)" + { + if(--depth == 0) + goto std; + else + goto comment; + } + "(*" { ++depth; goto comment; } + "\n" + { + if(cursor == s->eof) RETURN(0); + s->tok = s->pos = cursor; s->line++; + goto comment; + } + any { goto comment; } +*/ +} + +/* +void putStr(FILE *o, char *s, uint l){ + while(l-- > 0) + putc(*s++, o); +} +*/ + +main(){ + Scanner in; + memset((char*) &in, 0, sizeof(in)); + in.fd = 0; + while(scan(&in)){ +/* + putc('<', stdout); + putStr(stdout, (char*) in.tok, in.cur - in.tok); + putc('>', stdout); + putc('\n', stdout); +*/ + } +} diff --git a/tools/re2c/examples/repeater.re b/tools/re2c/examples/repeater.re new file mode 100644 index 0000000..f84b5c7 --- /dev/null +++ b/tools/re2c/examples/repeater.re @@ -0,0 +1,42 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#define RET(n) printf("%d\n", n); return n + +int scan(char *s, int l){ +char *p = s; +char *q; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l) +#define YYMARKER q +#define YYFILL(n) +/*!re2c + 'a'{1}"\n" {RET(1);} + 'a'{2,3}"\n" {RET(2);} + 'a'{4,}"\n" {RET(3);} + 'a'{6}"\n" {RET(4);} + [^aq]|"\n" {RET(0);} +*/ +} + +#define do_scan(str) scan(str, strlen(str)) + +main() +{ + do_scan("a\n"); + do_scan("aa\n"); + do_scan("aaa\n"); + do_scan("aaaa\n"); + do_scan("A\n"); + do_scan("AA\n"); + do_scan("aAa\n"); + do_scan("AaaA\n"); + do_scan("Q"); + do_scan("AaaAa\n"); + do_scan("AaaAaA\n"); + do_scan("A"); + do_scan("\n"); + do_scan("0"); +} diff --git a/tools/re2c/examples/rexx/README b/tools/re2c/examples/rexx/README new file mode 100644 index 0000000..2af0178 --- /dev/null +++ b/tools/re2c/examples/rexx/README @@ -0,0 +1 @@ +Replacement modules for an existing REXX interpreter. Not standalone. diff --git a/tools/re2c/examples/rexx/rexx.l b/tools/re2c/examples/rexx/rexx.l new file mode 100644 index 0000000..b74741d --- /dev/null +++ b/tools/re2c/examples/rexx/rexx.l @@ -0,0 +1,319 @@ +#include "scanio.h" +#include "scanner.h" + +#define CURSOR ch +#define LOADCURSOR ch = *cursor; +#define ADVANCE cursor++; +#define BACK(n) cursor -= (n); +#define CHECK(n) if((ScanCB.lim - cursor) < (n)){cursor = ScanFill(cursor);} +#define MARK(n) ScanCB.ptr = cursor; sel = (n); +#define REVERT cursor = ScanCB.ptr; +#define MARKER sel + +#define RETURN(i) {ScanCB.cur = cursor; return i;} + +int ScanToken(){ + uchar *cursor = ScanCB.cur; + unsigned sel; + uchar ch; + ScanCB.tok = cursor; + ScanCB.eot = NULL; +/*!re2c +all = [\000-\377]; +eof = [\000]; +any = all\eof; +letter = [a-z]|[A-Z]; +digit = [0-9]; +symchr = letter|digit|[.!?_]; +const = (digit|[.])symchr*([eE][+-]?digit+)?; +simple = (symchr\(digit|[.]))(symchr\[.])*; +stem = simple [.]; +symbol = symchr*; +sqstr = ['] ((any\['\n])|(['][']))* [']; +dqstr = ["] ((any\["\n])|(["]["]))* ["]; +str = sqstr|dqstr; +ob = [ \t]*; +not = [\\~]; +A = [aA]; +B = [bB]; +C = [cC]; +D = [dD]; +E = [eE]; +F = [fF]; +G = [gG]; +H = [hH]; +I = [iI]; +J = [jJ]; +K = [kK]; +L = [lL]; +M = [mM]; +N = [nN]; +O = [oO]; +P = [pP]; +Q = [qQ]; +R = [rR]; +S = [sS]; +T = [tT]; +U = [uU]; +V = [vV]; +W = [wW]; +X = [xX]; +Y = [yY]; +Z = [zZ]; +*/ + +scan: +/*!re2c +"\n" + { + ++(ScanCB.lineNum); + ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk); + RETURN(SU_EOL); + } +"|" ob "|" + { RETURN(OP_CONCAT); } +"+" + { RETURN(OP_PLUS); } +"-" + { RETURN(OP_MINUS); } +"*" + { RETURN(OP_MULT); } +"/" + { RETURN(OP_DIV); } +"%" + { RETURN(OP_IDIV); } +"/" ob "/" + { RETURN(OP_REMAIN); } +"*" ob "*" + { RETURN(OP_POWER); } +"=" + { RETURN(OP_EQUAL); } +not ob "=" | "<" ob ">" | ">" ob "<" + { RETURN(OP_EQUAL_N); } +">" + { RETURN(OP_GT); } +"<" + { RETURN(OP_LT); } +">" ob "=" | not ob "<" + { RETURN(OP_GE); } +"<" ob "=" | not ob ">" + { RETURN(OP_LE); } +"=" ob "=" + { RETURN(OP_EQUAL_EQ); } +not ob "=" ob "=" + { RETURN(OP_EQUAL_EQ_N); } +">" ob ">" + { RETURN(OP_GT_STRICT); } +"<" ob "<" + { RETURN(OP_LT_STRICT); } +">" ob ">" ob "=" | not ob "<" ob "<" + { RETURN(OP_GE_STRICT); } +"<" ob "<" ob "=" | not ob ">" ob ">" + { RETURN(OP_LE_STRICT); } +"&" + { RETURN(OP_AND); } +"|" + { RETURN(OP_OR); } +"&" ob "&" + { RETURN(OP_XOR); } +not + { RETURN(OP_NOT); } + +":" + { RETURN(SU_COLON); } +"," + { RETURN(SU_COMMA); } +"(" + { RETURN(SU_POPEN); } +")" + { RETURN(SU_PCLOSE); } +";" + { RETURN(SU_EOC); } + +A D D R E S S + { RETURN(RX_ADDRESS); } +A R G + { RETURN(RX_ARG); } +C A L L + { RETURN(RX_CALL); } +D O + { RETURN(RX_DO); } +D R O P + { RETURN(RX_DROP); } +E L S E + { RETURN(RX_ELSE); } +E N D + { RETURN(RX_END); } +E X I T + { RETURN(RX_EXIT); } +I F + { RETURN(RX_IF); } +I N T E R P R E T + { RETURN(RX_INTERPRET); } +I T E R A T E + { RETURN(RX_ITERATE); } +L E A V E + { RETURN(RX_LEAVE); } +N O P + { RETURN(RX_NOP); } +N U M E R I C + { RETURN(RX_NUMERIC); } +O P T I O N S + { RETURN(RX_OPTIONS); } +O T H E R W I S E + { RETURN(RX_OTHERWISE); } +P A R S E + { RETURN(RX_PARSE); } +P R O C E D U R E + { RETURN(RX_PROCEDURE); } +P U L L + { RETURN(RX_PULL); } +P U S H + { RETURN(RX_PUSH); } +Q U E U E + { RETURN(RX_QUEUE); } +R E T U R N + { RETURN(RX_RETURN); } +S A Y + { RETURN(RX_SAY); } +S E L E C T + { RETURN(RX_SELECT); } +S I G N A L + { RETURN(RX_SIGNAL); } +T H E N + { RETURN(RX_THEN); } +T R A C E + { RETURN(RX_TRACE); } +W H E N + { RETURN(RX_WHEN); } +O F F + { RETURN(RXS_OFF); } +O N + { RETURN(RXS_ON); } +B Y + { RETURN(RXS_BY); } +D I G I T S + { RETURN(RXS_DIGITS); } +E N G I N E E R I N G + { RETURN(RXS_ENGINEERING); } +E R R O R + { RETURN(RXS_ERROR); } +E X P O S E + { RETURN(RXS_EXPOSE); } +F A I L U R E + { RETURN(RXS_FAILURE); } +F O R + { RETURN(RXS_FOR); } +F O R E V E R + { RETURN(RXS_FOREVER); } +F O R M + { RETURN(RXS_FORM); } +F U Z Z + { RETURN(RXS_FUZZ); } +H A L T + { RETURN(RXS_HALT); } +L I N E I N + { RETURN(RXS_LINEIN); } +N A M E + { RETURN(RXS_NAME); } +N O T R E A D Y + { RETURN(RXS_NOTREADY); } +N O V A L U E + { RETURN(RXS_NOVALUE); } +S C I E N T I F I C + { RETURN(RXS_SCIENTIFIC); } +S O U R C E + { RETURN(RXS_SOURCE); } +S Y N T A X + { RETURN(RXS_SYNTAX); } +T O + { RETURN(RXS_TO); } +U N T I L + { RETURN(RXS_UNTIL); } +U P P E R + { RETURN(RXS_UPPER); } +V A L U E + { RETURN(RXS_VALUE); } +V A R + { RETURN(RXS_VAR); } +V E R S I O N + { RETURN(RXS_VERSION); } +W H I L E + { RETURN(RXS_WHILE); } +W I T H + { RETURN(RXS_WITH); } + +const + { RETURN(SU_CONST); } +simple + { RETURN(SU_SYMBOL); } +stem + { RETURN(SU_SYMBOL_STEM); } +symbol + { RETURN(SU_SYMBOL_COMPOUND); } +str + { RETURN(SU_LITERAL); } +str [bB] / (all\symchr) + { RETURN(SU_LITERAL_BIN); } +str [xX] / (all\symchr) + { RETURN(SU_LITERAL_HEX); } + +eof + { RETURN(SU_EOF); } +any + { RETURN(SU_ERROR); } +*/ +} + +bool StripToken(){ + uchar *cursor = ScanCB.cur; + unsigned depth; + uchar ch; + bool blanks = FALSE; + ScanCB.eot = cursor; +strip: +/*!re2c +"/*" + { + depth = 1; + goto comment; + } +"\r" + { goto strip; } +[ \t] + { + blanks = TRUE; + goto strip; + } +[] / all + { RETURN(blanks); } +*/ + +comment: +/*!re2c +"*/" + { + if(--depth == 0) + goto strip; + else + goto comment; + } +"\n" + { + ++(ScanCB.lineNum); + ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk); + goto comment; + } +"/*" + { + ++depth; + goto comment; + } +eof + { RETURN(blanks); } +any + { + goto comment; + } +*/ +} diff --git a/tools/re2c/examples/rexx/scanio.c b/tools/re2c/examples/rexx/scanio.c new file mode 100644 index 0000000..de6898d --- /dev/null +++ b/tools/re2c/examples/rexx/scanio.c @@ -0,0 +1,41 @@ +uchar *ScanFill(uchar *cursor){ + unsigned cnt = s->tok - s->bot; + s->pos += cursor - s->mrk; + if(cnt){ + if(s->eot){ + unsigned len = s->eot - s->tok; + memcpy(s->bot, s->tok, len); + s->eot = &s->bot[len]; + if((len = s->lim - cursor) != 0) + memcpy(s->eot, cursor, len); + cursor = s->eot; + s->lim = &cursor[len]; + } else { + memcpy(s->bot, s->tok, s->lim - s->tok); + cursor -= cnt; + s->lim -= cnt; + } + s->tok = s->bot; + s->ptr -= cnt; + } + if((s->top - s->lim) < 512){ + uchar *buf = (uchar*) malloc(((s->lim - s->bot) + 512)*sizeof(uchar)); + memcpy(buf, s->bot, s->lim - s->bot); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + if(s->eot) + s->eot = &buf[s->eot - s->bot]; + cursor = &buf[cursor - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[512]; + free(s->bot); + s->bot = buf; + } + s->mrk = cursor; + if(ScanCBIO.file){ + if((cnt = read(ScanCBIO.u.f.fd, (char*) s->lim, 512)) != 512) + memset(&s->lim[cnt], 0, 512 - cnt); + s->lim += 512; + } + return cursor; +} diff --git a/tools/re2c/examples/sample.re b/tools/re2c/examples/sample.re new file mode 100644 index 0000000..2f497a3 --- /dev/null +++ b/tools/re2c/examples/sample.re @@ -0,0 +1,7 @@ +/*!re2c + "print" {return PRINT;} + [a-z]+ {return ID;} + [0-9]+ {return DEC;} + "0x" [0-9a-f]+ {return HEX;} + [\000-\377] {return ERR;} +*/ diff --git a/tools/re2c/examples/simple.re b/tools/re2c/examples/simple.re new file mode 100644 index 0000000..5fd8891 --- /dev/null +++ b/tools/re2c/examples/simple.re @@ -0,0 +1,13 @@ +#define NULL ((char*) 0) +char *scan(char *p){ +char *q; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT p +#define YYMARKER q +#define YYFILL(n) +/*!re2c + [0-9]+ {return YYCURSOR;} + [\000-\377] {return NULL;} +*/ +} diff --git a/tools/re2c/globals.h b/tools/re2c/globals.h new file mode 100644 index 0000000..344f3de --- /dev/null +++ b/tools/re2c/globals.h @@ -0,0 +1,26 @@ +#ifndef re2c_globals_h +#define re2c_globals_h + +#include "tools/re2c/basics.h" + +extern const char *fileName; +extern char *outputFileName; +extern int sFlag; +extern int bFlag; +extern int dFlag; +extern int iFlag; +extern int bUsedYYAccept; +extern unsigned int oline; +extern unsigned int maxFill; +extern int vFillIndexes; +extern unsigned char *vUsedLabels; +extern unsigned int vUsedLabelAlloc; + +extern unsigned char asc2ebc[256]; +extern unsigned char ebc2asc[256]; + +extern unsigned char *xlat, *talx; + +char *mystrdup(const char *str); + +#endif diff --git a/tools/re2c/ins.h b/tools/re2c/ins.h new file mode 100644 index 0000000..ba6c087 --- /dev/null +++ b/tools/re2c/ins.h @@ -0,0 +1,40 @@ +#ifndef re2c_ins_h +#define re2c_ins_h + +#include "tools/re2c/basics.h" + +#define nChars 256 +typedef unsigned char Char; + +#define CHAR 0 +#define GOTO 1 +#define FORK 2 +#define TERM 3 +#define CTXT 4 + +typedef union Ins { + struct { + byte tag; + byte marked; + void *link; + } i; + struct { + unsigned short value; + unsigned short bump; + void *link; + } c; +} Ins; + +static int isMarked(Ins *i){ + return i->i.marked != 0; +} + +static void mark(Ins *i){ + i->i.marked = 1; +} + +static void unmark(Ins *i){ + i->i.marked = 0; +} + +#endif diff --git a/tools/re2c/main.c b/tools/re2c/main.c new file mode 100644 index 0000000..9484316 --- /dev/null +++ b/tools/re2c/main.c @@ -0,0 +1,196 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "globals.h" +#include "parse.h" +#include "dfa.h" +#include "mbo_getopt.h" + +const char *fileName = 0; +char *outputFileName = 0; +int sFlag = 0; +int bFlag = 0; +int dFlag = 0; +int iFlag = 0; +int bUsedYYAccept = 0; +unsigned int oline = 1; +unsigned int maxFill = 1; +int vFillIndexes = -1; +unsigned char *vUsedLabels; +unsigned int vUsedLabelAlloc = 1000; + +static char *opt_arg = NULL; +static int opt_ind = 1; + +static const mbo_opt_struct OPTIONS[] = { + {'?', 0, "help"}, + {'b', 0, "bit-vectors"}, + {'d', 0, "debug-output"}, + {'e', 0, "ecb"}, + {'f', 0, "storable-state"}, + {'h', 0, "help"}, + {'i', 0, "no-debug-info"}, + {'o', 1, "output"}, + {'s', 0, "nested-ifs"}, + {'v', 0, "version"}, + {'-', 0, NULL} /* end of args */ +}; + +static void usage() +{ + fprintf(stderr, + "usage: re2c [-esbvhd] file\n" + "\n" + "-? -h --help Display this info.\n" + "\n" + "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n" + " coax better code out of the compiler. Most useful for\n"); + fprintf(stderr, + " specifications with more than a few keywords (e.g. for\n" + " most programming languages).\n" + "\n" + "-e --ecb Cross-compile from an ASCII platform to\n" + " an EBCDIC one.\n" + "\n"); + fprintf(stderr, + "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n" + " need this assist to generate better code.\n" + "\n" + "-f --storable-state Generate a scanner with support for storable state\n" + "\n" + "-o --output=output Specify the output file instead of stdout\n" + "\n"); + fprintf(stderr, + "-d --debug-output Creates a parser that dumps information during\n" + " about the current position and in which state the\n" + " parser is.\n" + "\n" + "-i --no-debug-info Do not generate '#line' info (usefull for versioning).\n" + "\n" + "-v --version Show version information.\n" + "-V --vernum Show version as one number.\n"); +} + +char * +mystrdup(const char *str) +{ + size_t len; + char *copy; + + len = strlen(str) + 1; + copy = malloc(len); + memcpy(copy, str, len); + return (copy); +} + +int main(int argc, char *argv[]) +{ + int c; + FILE *f, *output; + + fileName = NULL; + + if(argc == 1) { + usage(); + return 2; + } + + while ((c = mbo_getopt(argc, argv, OPTIONS, &opt_arg, &opt_ind, 0))!=-1) { + switch (c) { + case 'b': + sFlag = 1; + bFlag = 1; + break; + case 'e': + xlat = asc2ebc; + talx = ebc2asc; + break; + case 's': + sFlag = 1; + break; + case 'd': + dFlag = 1; + break; + case 'f': + vFillIndexes = 0; + break; + case 'i': + iFlag = 1; + break; + case 'o': + outputFileName = opt_arg; + break; + case 'v': + fputs("re2c " PACKAGE_VERSION "\n", stdout); + break; + case 'V': { + int v1, v2, v3; + sscanf(PACKAGE_VERSION, "%d.%d.%d", &v1, &v2, &v3); + fprintf(stdout, "%02d%02d%02d\n", v1, v2, v3); + return 2; + } + case 'h': + case '?': + default: + usage(); + return 2; + } + } + + if (argc == opt_ind + 1) { + fileName = argv[opt_ind]; + } else { + usage(); + return 2; + } + + vUsedLabels = calloc(vUsedLabelAlloc, 1); + if (!vUsedLabels) { + fputs("Out of memory.\n", stderr); + return 1; + } + + /* set up the input stream */ + if(fileName[0] == '-' && fileName[1] == '\0'){ + fileName = "<stdin>"; + f = stdin; + } else { + if((f = fopen(fileName, "rt")) == NULL){ + fprintf(stderr, "can't open %s\n", fileName); + return 1; + } + } + + /* set up the output stream */ + if (outputFileName == 0 || (fileName[0] == '-' && fileName[1] == '\0')) { + outputFileName = mystrdup("<stdout>"); + output = stdout; + } else { + int len; + char *src, *dst, *tmp; + + output = fopen(outputFileName, "wt"); + if (!output) { + fprintf(stderr, "can't open %s\n", outputFileName); + return 1; + } + + len = strlen(outputFileName); + tmp = (char*)malloc((len+1)*2); + + for (src = outputFileName, dst = tmp; *src; ++src) + { + if (*src == '\\') + *dst++ = *src; + *dst++ = *src; + } + *dst = '\0'; + + outputFileName = tmp; + } + + parse(f, output); + free(outputFileName); + return 0; +} diff --git a/tools/re2c/mbo_getopt.c b/tools/re2c/mbo_getopt.c new file mode 100644 index 0000000..f4553dc --- /dev/null +++ b/tools/re2c/mbo_getopt.c @@ -0,0 +1,194 @@ +/* + Author: Marcus Boerger <helly@users.sourceforge.net> +*/ + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include "mbo_getopt.h" +#define OPTERRCOLON (1) +#define OPTERRNF (2) +#define OPTERRARG (3) + +static int mbo_opt_error(int argc, char * const *argv, int oint, int optchr, int err, int show_err) +{ + if (show_err) + { + fprintf(stderr, "Error in argument %d, char %d: ", oint, optchr + 1); + + switch (err) + { + + case OPTERRCOLON: + fprintf(stderr, ": in flags\n"); + break; + + case OPTERRNF: + fprintf(stderr, "option not found %c\n", argv[oint][optchr]); + break; + + case OPTERRARG: + fprintf(stderr, "no argument for option %c\n", argv[oint][optchr]); + break; + + default: + fprintf(stderr, "unknown\n"); + break; + } + } + + return ('?'); +} + +int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct opts[], char **optarg, int *optind, int show_err) +{ + static int optchr = 0; + static int dash = 0; /* have already seen the - */ + int arg_start = 2; + + int opts_idx = -1; + + if (*optind >= argc) + { + return (EOF); + } + + if (!dash) + { + if ((argv[*optind][0] != '-')) + { + return (EOF); + } + else + { + if (!argv[*optind][1]) + { + /* + * use to specify stdin. Need to let pgm process this and + * the following args + */ + return (EOF); + } + } + } + + if ((argv[*optind][0] == '-') && (argv[*optind][1] == '-')) + { + /* '--' indicates end of args if not followed by a known long option name */ + + while (1) + { + opts_idx++; + + if (opts[opts_idx].opt_char == '-') + { + (*optind)++; + return (EOF); + } + else if (opts[opts_idx].opt_name && !strcmp(&argv[*optind][2], opts[opts_idx].opt_name)) + { + break; + } + } + + optchr = 0; + dash = 1; + arg_start = 2 + strlen(opts[opts_idx].opt_name); + } + + if (!dash) + { + dash = 1; + optchr = 1; + } + + /* Check if the guy tries to do a -: kind of flag */ + if (argv[*optind][optchr] == ':') + { + dash = 0; + (*optind)++; + return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRCOLON, show_err)); + } + + if (opts_idx < 0) + { + while (1) + { + opts_idx++; + + if (opts[opts_idx].opt_char == '-') + { + int errind = *optind; + int errchr = optchr; + + if (!argv[*optind][optchr + 1]) + { + dash = 0; + (*optind)++; + } + else + { + optchr++; + } + + return (mbo_opt_error(argc, argv, errind, errchr, OPTERRNF, show_err)); + } + else if (argv[*optind][optchr] == opts[opts_idx].opt_char) + { + break; + } + } + } + + if (opts[opts_idx].need_param) + { + /* Check for cases where the value of the argument + is in the form -<arg> <val> or in the form -<arg><val> */ + dash = 0; + + if (!argv[*optind][arg_start]) + { + (*optind)++; + + if (*optind == argc) + { + return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err)); + } + + *optarg = argv[(*optind)++]; + } + else + { + *optarg = &argv[*optind][arg_start]; + (*optind)++; + } + + return opts[opts_idx].opt_char; + } + else + { + if (arg_start == 2) + { + if (!argv[*optind][optchr + 1]) + { + dash = 0; + (*optind)++; + } + else + { + optchr++; + } + } + else + { + (*optind)++; + } + + return opts[opts_idx].opt_char; + } + + assert(0); + return (0); /* never reached */ +} + diff --git a/tools/re2c/mbo_getopt.h b/tools/re2c/mbo_getopt.h new file mode 100644 index 0000000..8f962fd --- /dev/null +++ b/tools/re2c/mbo_getopt.h @@ -0,0 +1,22 @@ +/* + Author: Marcus Boerger <helly@users.sourceforge.net> +*/ + +/* Define structure for one recognized option (both single char and long name). + * If short_open is '-' this is the last option. + */ + +#ifndef RE2C_MBO_GETOPT_H_INCLUDE_GUARD_ +#define RE2C_MBO_GETOPT_H_INCLUDE_GUARD_ + +typedef struct mbo_opt_struct +{ + const char opt_char; + const int need_param; + const char * opt_name; +} mbo_opt_struct; + +int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct opts[], char **optarg, int *optind, int show_err); + +#endif + diff --git a/tools/re2c/parse.h b/tools/re2c/parse.h new file mode 100644 index 0000000..92077ca --- /dev/null +++ b/tools/re2c/parse.h @@ -0,0 +1,29 @@ +#ifndef re2c_parse_h +#define re2c_parse_h + +#include <stdio.h> +#include "tools/re2c/scanner.h" +#include "tools/re2c/re.h" + +typedef struct Symbol { + struct Symbol *next; + Str name; + RegExp *re; +} Symbol; + +void Symbol_init(Symbol *, const SubStr*); +static Symbol *Symbol_new(const SubStr*); +Symbol *Symbol_find(const SubStr*); + +void line_source(FILE *, unsigned int); +void parse(FILE *, FILE *); + +static Symbol * +Symbol_new(const SubStr *str) +{ + Symbol *r = malloc(sizeof(Symbol)); + Symbol_init(r, str); + return r; +} + +#endif diff --git a/tools/re2c/parser.c b/tools/re2c/parser.c new file mode 100644 index 0000000..02d5c66 --- /dev/null +++ b/tools/re2c/parser.c @@ -0,0 +1,249 @@ +#include <time.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "tools/re2c/globals.h" +#include "tools/re2c/parse.h" +#include "tools/re2c/parser.h" + +int yylex(void); +static RegExp *parse_expr(void); +static RegExp *parse_diff(void); +static RegExp *parse_term(void); +static RegExp *parse_factor(void); +static RegExp *parse_primary(void); + +static unsigned int accept; +static RegExp *spec; +static Scanner *in; + +static int curtok, peektok; +yystype yylval; +static yystype peekval; + +#define get_next_token() (curtok = yylex()) + +static void +get_peek_token(void) +{ + yystype temp = yylval; /* structure copy */ + if (peektok != NONE) + Scanner_fatal(in, "more than one token of lookahead?"); + peektok = yylex(); + peekval = yylval; /* structure copy */ + yylval = temp; +} + +static void +yyparse(void) +{ + RegExp *re, *look; + + accept = 0; + spec = NULL; + get_next_token(); + while (curtok != 0) { + switch (curtok) { + case ID: + get_peek_token(); + if (peektok == '=') { + /* ID = expr; */ + Symbol *sym = yylval.symbol; + get_next_token(); /* id */ + get_next_token(); /* = */ + re = parse_expr(); + if (curtok != ';') + Scanner_fatal(in, "missing `;' after regexp"); + get_next_token(); /* ; */ + if (sym->re) + Scanner_fatal(in, "sym already defined"); + sym->re = re; + break; + } + /*@fallthrough@*/ + default: + /* rule: expr [/ expr] CODE */ + re = parse_expr(); + if (!re) + Scanner_fatal(in, "expression syntax error"); + + if (curtok == '/') { + get_next_token(); /* / */ + look = parse_expr(); + } else + look = RegExp_new_NullOp(); + + if (curtok != CODE) + Scanner_fatal(in, "missing code after regexp"); + re = RegExp_new_RuleOp(re, look, yylval.token, accept++); + get_next_token(); /* CODE */ + spec = spec ? mkAlt(spec, re) : re; + } + } +} + +static RegExp * +parse_expr(void) +{ + RegExp *e, *f; + e = parse_diff(); + while (curtok == '|') { + get_next_token(); /* | */ + f = parse_diff(); + e = mkAlt(e, f); + } + return e; +} + +static RegExp * +parse_diff(void) +{ + RegExp *e, *f; + e = parse_term(); + while (curtok == '\\') { + get_next_token(); /* \ */ + f = parse_term(); + e = mkDiff(e, f); + if(!e) + Scanner_fatal(in, "can only difference char sets"); + } + return e; +} + +static RegExp * +parse_term(void) +{ + RegExp *e, *f; + e = parse_factor(); + while ((f = parse_factor())) { + e = RegExp_new_CatOp(e, f); + } + return e; +} + +static RegExp * +parse_factor(void) +{ + RegExp *e; + char ch; + e = parse_primary(); + while (curtok == CLOSE || curtok == CLOSESIZE) { + switch (curtok) { + case CLOSE: + ch = yylval.op; + while (get_next_token() == CLOSE) { + if (ch != yylval.op) + ch = '*'; + } + switch (ch) { + case '*': + e = mkAlt(RegExp_new_CloseOp(e), RegExp_new_NullOp()); + break; + case '+': + e = RegExp_new_CloseOp(e); + break; + case '?': + e = mkAlt(e, RegExp_new_NullOp()); + break; + } + break; + case CLOSESIZE: + e = RegExp_new_CloseVOp(e, yylval.extop.minsize, + yylval.extop.maxsize); + get_next_token(); /* CLOSESIZE */ + break; + default: + Scanner_fatal(in, "parse error"); + break; + } + } + return e; +} + +static RegExp * +parse_primary(void) +{ + RegExp *e; + switch (curtok) { + case ID: + if (!yylval.symbol->re) + Scanner_fatal(in, "can't find symbol"); + e = yylval.symbol->re; + get_next_token(); + break; + case RANGE: + case STRING: + e = yylval.regexp; + get_next_token(); + break; + case '(': + get_next_token(); + e = parse_expr(); + if (curtok != ')') + Scanner_fatal(in, "missing closing parenthesis"); + get_next_token(); + break; + default: + return NULL; + } + return e; +} + +int +yylex(void) +{ + if (peektok != NONE) { + int tok = peektok; + yylval = peekval; + peektok = NONE; + return tok; + } + return Scanner_scan(in); +} + +void line_source(FILE *o, unsigned int line) +{ + char * fnamebuf; + char * token; + + if (iFlag) + return; + fprintf(o, "#line %u \"", line); + if( fileName != NULL ) { + fnamebuf = mystrdup( fileName ); + } else { + fnamebuf = mystrdup( "<stdin>" ); + } + token = strtok( fnamebuf, "\\" ); + for(;;) { + fprintf(o, "%s", token); + token = strtok( NULL, "\\" ); + if( token == NULL ) break; + fputs("\\\\", o); + } + fputs("\"\n", o); oline++; + free( fnamebuf ); +} + +void parse(FILE *i, FILE *o){ + time_t now; + + time(&now); + + peektok = NONE; + + fputs("/* Generated by re2c 0.9.1-C on ", o); + fprintf(o, "%-24s", ctime(&now)); + fputs(" */\n", o); oline+=2; + + in = Scanner_new(i); + + line_source(o, Scanner_line(in)); + + while(Scanner_echo(in, o)){ + yyparse(); + if(spec) + genCode(o, spec); + line_source(o, Scanner_line(in)); + } +} diff --git a/tools/re2c/parser.h b/tools/re2c/parser.h new file mode 100644 index 0000000..c433a99 --- /dev/null +++ b/tools/re2c/parser.h @@ -0,0 +1,33 @@ +#ifndef RE2C_PARSER_H +#define RE2C_PARSER_H + +/* Tokens */ +enum yytokentype { + CLOSESIZE = 258, + CLOSE = 259, + ID = 260, + CODE = 261, + RANGE = 262, + STRING = 263, + NONE = 264 +}; + +#define CLOSESIZE 258 +#define CLOSE 259 +#define ID 260 +#define CODE 261 +#define RANGE 262 +#define STRING 263 +#define NONE 264 + +typedef union { + Symbol *symbol; + RegExp *regexp; + Token *token; + char op; + ExtOp extop; +} yystype; + +extern yystype yylval; + +#endif diff --git a/tools/re2c/re.h b/tools/re2c/re.h new file mode 100644 index 0000000..b45208b --- /dev/null +++ b/tools/re2c/re.h @@ -0,0 +1,191 @@ +#ifndef re2c_re_h +#define re2c_re_h + +#include <stdio.h> +#include "tools/re2c/token.h" +#include "tools/re2c/ins.h" + +typedef struct extop { + char op; + int minsize; + int maxsize; +} ExtOp; + +typedef struct CharPtn { + unsigned int card; + struct CharPtn *fix; + struct CharPtn *nxt; +} CharPtn; + +typedef struct CharSet { + CharPtn *fix; + CharPtn *freeHead, **freeTail; + CharPtn *rep[nChars]; + CharPtn ptn[nChars]; +} CharSet; + +typedef struct Range { + struct Range *next; + unsigned int lb, ub; /* [lb,ub) */ +} Range; + +static void +Range_init(Range *r, unsigned int l, unsigned int u) +{ + r->next = NULL; + r->lb = l; + r->ub = u; +} + +static Range * +Range_new(unsigned int l, unsigned int u) +{ + Range *r = malloc(sizeof(Range)); + r->next = NULL; + r->lb = l; + r->ub = u; + return r; +} + +static void +Range_copy(Range *ro, const Range *r) +{ + ro->next = NULL; + ro->lb = r->lb; + ro->ub = r->ub; +} + +static Range * +Range_new_copy(Range *r) +{ + Range *ro = malloc(sizeof(Range)); + ro->next = NULL; + ro->lb = r->lb; + ro->ub = r->ub; + return ro; +} + +void Range_out(FILE *, const Range *); + +typedef enum { + NULLOP = 1, + MATCHOP, + RULEOP, + ALTOP, + CATOP, + CLOSEOP, + CLOSEVOP +} RegExpType; + +typedef struct RegExp { + RegExpType type; + unsigned int size; + union { + /* for MatchOp */ + Range *match; + /* for RuleOp */ + struct { + struct RegExp *exp; + struct RegExp *ctx; + Ins *ins; + unsigned int accept; + Token *code; + unsigned int line; + } RuleOp; + /* for AltOp and CatOp*/ + struct { + struct RegExp *exp1, *exp2; + } AltCatOp; + /* for CloseOp */ + struct RegExp *exp; + /* for CloseVOp*/ + struct { + struct RegExp *exp; + int min; + int max; + } CloseVOp; + } d; +} RegExp; + +static RegExp * +RegExp_isA(RegExp *r, RegExpType t) +{ + return r->type == t ? r : NULL; +} + +void RegExp_split(RegExp*, CharSet*); +void RegExp_calcSize(RegExp*, Char*); +unsigned int RegExp_fixedLength(RegExp*); +void RegExp_compile(RegExp*, Char*, Ins*); +void RegExp_display(RegExp*, FILE *); + +static RegExp * +RegExp_new_NullOp(void) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = NULLOP; + return r; +} + +static RegExp * +RegExp_new_MatchOp(Range *m) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = MATCHOP; + r->d.match = m; + return r; +} + +RegExp *RegExp_new_RuleOp(RegExp*, RegExp*, Token*, unsigned int); + +static RegExp * +RegExp_new_AltOp(RegExp *e1, RegExp *e2) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = ALTOP; + r->d.AltCatOp.exp1 = e1; + r->d.AltCatOp.exp2 = e2; + return r; +} + +static RegExp * +RegExp_new_CatOp(RegExp *e1, RegExp *e2) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = CATOP; + r->d.AltCatOp.exp1 = e1; + r->d.AltCatOp.exp2 = e2; + return r; +} + +static RegExp * +RegExp_new_CloseOp(RegExp *e) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = CLOSEOP; + r->d.exp = e; + return r; +} + +static RegExp * +RegExp_new_CloseVOp(RegExp *e, int lb, int ub) +{ + RegExp *r = malloc(sizeof(RegExp)); + r->type = CLOSEVOP; + r->d.CloseVOp.exp = e; + r->d.CloseVOp.min = lb; + r->d.CloseVOp.max = ub; + return r; +} + +extern void genCode(FILE *, RegExp*); +extern RegExp *mkDiff(RegExp*, RegExp*); +extern RegExp *mkDot(void); +extern RegExp *strToRE(SubStr); +extern RegExp *strToCaseInsensitiveRE(SubStr); +extern RegExp *ranToRE(SubStr); +extern RegExp *invToRE(SubStr); + +extern RegExp *mkAlt(RegExp*, RegExp*); + +#endif diff --git a/tools/re2c/re2c.1 b/tools/re2c/re2c.1 new file mode 100644 index 0000000..d69f94d --- /dev/null +++ b/tools/re2c/re2c.1 @@ -0,0 +1,536 @@ +.ds re \fBre2c\fP +.ds le \fBlex\fP +.ds rx regular expression +.ds lx \fIl\fP-expression +.TH RE2C 1 "8 April 1994" "Version 0.5" +\"$Log: re2c.1,v $ +\"Revision 1.1 2002/04/07 22:27:06 peter +\"Initial revision +\" +\"Revision 1.2 1994/04/16 15:50:32 peterr +\"Fix bug in simple example. +\" +\"Revision 1.1 1994/04/08 15:39:09 peterr +\"Initial revision +\" +.SH NAME +re2c \- convert regular expressions to C/C++ + +.SH SYNOPSIS +\*(re [\fB-esb\fP] \fIname\fP + +.SH DESCRIPTION +\*(re is a preprocessor that generates C-based recognizers from regular +expressions. +The input to \*(re consists of C/C++ source interleaved with +comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain +scanner specifications. +In the output these comments are replaced with code that, when +executed, will find the next input token and then execute +some user-supplied token-specific code. + +For example, given the following code + +.in +3 +.nf +#define NULL ((char*) 0) +char *scan(char *p){ +char *q; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT p +#define YYMARKER q +#define YYFILL(n) +/*!re2c + [0-9]+ {return YYCURSOR;} + [\\000-\\377] {return NULL;} +*/ +} +.fi +.in -3 + +\*(re will generate + +.in +3 +.nf +/* Generated by re2c on Sat Apr 16 11:40:58 1994 */ +#line 1 "simple.re" +#define NULL ((char*) 0) +char *scan(char *p){ +char *q; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT p +#define YYMARKER q +#define YYFILL(n) +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy0; +yy1: ++YYCURSOR; +yy0: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych <= '/') goto yy4; + if(yych >= ':') goto yy4; +yy2: yych = *++YYCURSOR; + goto yy7; +yy3: +#line 10 + {return YYCURSOR;} +yy4: yych = *++YYCURSOR; +yy5: +#line 11 + {return NULL;} +yy6: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy7: if(yych <= '/') goto yy3; + if(yych <= '9') goto yy6; + goto yy3; +} +#line 12 + +} +.fi +.in -3 + +.SH OPTIONS +\*(re provides the following options: +.TP +\fB-e\fP +Cross-compile from an ASCII platform to an EBCDIC one. +.TP +\fB-s\fP +Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this +assist to generate better code. +.TP +\fB-b\fP +Implies \fB-s\fP. Use bit vectors as well in the attempt to coax better +code out of the compiler. Most useful for specifications with more than a +few keywords (e.g. for most programming languages). + +.SH "INTERFACE CODE" +Unlike other scanner generators, \*(re does not generate complete scanners: +the user must supply some interface code. +In particular, the user must define the following macros: +.TP +\fCYYCHAR\fP +Type used to hold an input symbol. +Usually \fCchar\fP or \fCunsigned char\fP. +.TP +\fCYYCURSOR\fP +\*(lx of type \fC*YYCHAR\fP that points to the current input symbol. +The generated code advances \fCYYCURSOR\fP as symbols are matched. +On entry, \fCYYCURSOR\fP is assumed to point to the first character of the +current token. On exit, \fCYYCURSOR\fP will point to the first character of +the following token. +.TP +\fCYLIMIT\fP +Expression of type \fC*YYCHAR\fP that marks the end of the buffer +(\fCYLIMIT[-1]\fP is the last character in the buffer). +The generated code repeatedly compares \fCYYCURSOR\fP to \fCYLIMIT\fP +to determine when the buffer needs (re)filling. +.TP +\fCYYMARKER\fP +\*(lx of type \fC*YYCHAR\fP. +The generated code saves backtracking information in \fCYYMARKER\fP. +.TP +\fCYYFILL(\fP\fIn\fP\fC)\fP +The generated code "calls" \fCYYFILL\fP when the buffer needs +(re)filling: at least \fIn\fP additional characters should +be provided. \fCYYFILL\fP should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP and +\fCYYMARKER\fP as needed. Note that for typical programming languages +\fIn\fP will be the length of the longest keyword plus one. + +.SH "SCANNER SPECIFICATIONS" +Each scanner specification consists of a set of \fIrules\fP and name +definitions. +Rules consist of a regular expression along with a block of C/C++ code that +is to be executed when the associated regular expression is matched. +Name definitions are of the form +``\fIname\fP \fC=\fP \fIregular expression\fP\fC;\fP''. + +.SH "SUMMARY OF RE2C REGULAR EXPRESSIONS" +.TP +\fC"foo"\fP +the literal string \fCfoo\fP. +ANSI-C escape sequences can be used. +.TP +\fC[xyz]\fP +a "character class"; in this case, +the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'. +.TP +\fC[abj-oZ]\fP +a "character class" with a range in it; +matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP', +or a '\fCZ\fP'. +.TP +\fIr\fP\fC\e\fP\fIs\fP +match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions +which can be expressed as character classes. +.TP +\fIr\fP\fC*\fP +zero or more \fIr\fP's, where \fIr\fP is any regular expression +.TP +\fC\fIr\fP\fC+\fP +one or more \fIr\fP's +.TP +\fC\fIr\fP\fC?\fP +zero or one \fIr\fP's (that is, "an optional \fIr\fP") +.TP +name +the expansion of the "name" definition (see above) +.TP +\fC(\fP\fIr\fP\fC)\fP +an \fIr\fP; parentheses are used to override precedence +(see below) +.TP +\fIrs\fP +an \fIr\fP followed by an \fIs\fP ("concatenation") +.TP +\fIr\fP\fC|\fP\fIs\fP +either an \fIr\fP or an \fIs\fP +.TP +\fIr\fP\fC/\fP\fIs\fP +an \fIr\fP but only if it is followed by an \fIs\fP. The s is not part of +the matched text. This type of \*(rx is called "trailing context". +.LP +The regular expressions listed above are grouped according to +precedence, from highest precedence at the top to lowest at the bottom. +Those grouped together have equal precedence. + +.SH "A LARGER EXAMPLE" +.LP +.in +3 +.nf +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> + +#define ADDEQ 257 +#define ANDAND 258 +#define ANDEQ 259 +#define ARRAY 260 +#define ASM 261 +#define AUTO 262 +#define BREAK 263 +#define CASE 264 +#define CHAR 265 +#define CONST 266 +#define CONTINUE 267 +#define DECR 268 +#define DEFAULT 269 +#define DEREF 270 +#define DIVEQ 271 +#define DO 272 +#define DOUBLE 273 +#define ELLIPSIS 274 +#define ELSE 275 +#define ENUM 276 +#define EQL 277 +#define EXTERN 278 +#define FCON 279 +#define FLOAT 280 +#define FOR 281 +#define FUNCTION 282 +#define GEQ 283 +#define GOTO 284 +#define ICON 285 +#define ID 286 +#define IF 287 +#define INCR 288 +#define INT 289 +#define LEQ 290 +#define LONG 291 +#define LSHIFT 292 +#define LSHIFTEQ 293 +#define MODEQ 294 +#define MULEQ 295 +#define NEQ 296 +#define OREQ 297 +#define OROR 298 +#define POINTER 299 +#define REGISTER 300 +#define RETURN 301 +#define RSHIFT 302 +#define RSHIFTEQ 303 +#define SCON 304 +#define SHORT 305 +#define SIGNED 306 +#define SIZEOF 307 +#define STATIC 308 +#define STRUCT 309 +#define SUBEQ 310 +#define SWITCH 311 +#define TYPEDEF 312 +#define UNION 313 +#define UNSIGNED 314 +#define VOID 315 +#define VOLATILE 316 +#define WHILE 317 +#define XOREQ 318 +#define EOI 319 + +typedef unsigned int uint; +typedef unsigned char uchar; + +#define BSIZE 8192 + +#define YYCTYPE uchar +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RET(i) {s->cur = cursor; return i;} + +typedef struct Scanner { + int fd; + uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + uint line; +} Scanner; + +uchar *fill(Scanner *s, uchar *cursor){ + if(!s->eof){ + uint cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + uchar *buf = (uchar*) + malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + free(s->bot); + s->bot = buf; + } + if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ + s->eof = &s->lim[cnt]; *(s->eof)++ = '\\n'; + } + s->lim += cnt; + } + return cursor; +} + +int scan(Scanner *s){ + uchar *cursor = s->cur; +std: + s->tok = cursor; +/*!re2c +any = [\\000-\\377]; +O = [0-7]; +D = [0-9]; +L = [a-zA-Z_]; +H = [a-fA-F0-9]; +E = [Ee] [+-]? D+; +FS = [fFlL]; +IS = [uUlL]*; +ESC = [\\\\] ([abfnrtv?'"\\\\] | "x" H+ | O+); +*/ + +/*!re2c + "/*" { goto comment; } + + "auto" { RET(AUTO); } + "break" { RET(BREAK); } + "case" { RET(CASE); } + "char" { RET(CHAR); } + "const" { RET(CONST); } + "continue" { RET(CONTINUE); } + "default" { RET(DEFAULT); } + "do" { RET(DO); } + "double" { RET(DOUBLE); } + "else" { RET(ELSE); } + "enum" { RET(ENUM); } + "extern" { RET(EXTERN); } + "float" { RET(FLOAT); } + "for" { RET(FOR); } + "goto" { RET(GOTO); } + "if" { RET(IF); } + "int" { RET(INT); } + "long" { RET(LONG); } + "register" { RET(REGISTER); } + "return" { RET(RETURN); } + "short" { RET(SHORT); } + "signed" { RET(SIGNED); } + "sizeof" { RET(SIZEOF); } + "static" { RET(STATIC); } + "struct" { RET(STRUCT); } + "switch" { RET(SWITCH); } + "typedef" { RET(TYPEDEF); } + "union" { RET(UNION); } + "unsigned" { RET(UNSIGNED); } + "void" { RET(VOID); } + "volatile" { RET(VOLATILE); } + "while" { RET(WHILE); } + + L (L|D)* { RET(ID); } + + ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | + (['] (ESC|any\\[\\n\\\\'])* [']) + { RET(ICON); } + + (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) + { RET(FCON); } + + (["] (ESC|any\\[\\n\\\\"])* ["]) + { RET(SCON); } + + "..." { RET(ELLIPSIS); } + ">>=" { RET(RSHIFTEQ); } + "<<=" { RET(LSHIFTEQ); } + "+=" { RET(ADDEQ); } + "-=" { RET(SUBEQ); } + "*=" { RET(MULEQ); } + "/=" { RET(DIVEQ); } + "%=" { RET(MODEQ); } + "&=" { RET(ANDEQ); } + "^=" { RET(XOREQ); } + "|=" { RET(OREQ); } + ">>" { RET(RSHIFT); } + "<<" { RET(LSHIFT); } + "++" { RET(INCR); } + "--" { RET(DECR); } + "->" { RET(DEREF); } + "&&" { RET(ANDAND); } + "||" { RET(OROR); } + "<=" { RET(LEQ); } + ">=" { RET(GEQ); } + "==" { RET(EQL); } + "!=" { RET(NEQ); } + ";" { RET(';'); } + "{" { RET('{'); } + "}" { RET('}'); } + "," { RET(','); } + ":" { RET(':'); } + "=" { RET('='); } + "(" { RET('('); } + ")" { RET(')'); } + "[" { RET('['); } + "]" { RET(']'); } + "." { RET('.'); } + "&" { RET('&'); } + "!" { RET('!'); } + "~" { RET('~'); } + "-" { RET('-'); } + "+" { RET('+'); } + "*" { RET('*'); } + "/" { RET('/'); } + "%" { RET('%'); } + "<" { RET('<'); } + ">" { RET('>'); } + "^" { RET('^'); } + "|" { RET('|'); } + "?" { RET('?'); } + + + [ \\t\\v\\f]+ { goto std; } + + "\\n" + { + if(cursor == s->eof) RET(EOI); + s->pos = cursor; s->line++; + goto std; + } + + any + { + printf("unexpected character: %c\\n", *s->tok); + goto std; + } +*/ + +comment: +/*!re2c + "*/" { goto std; } + "\\n" + { + if(cursor == s->eof) RET(EOI); + s->tok = s->pos = cursor; s->line++; + goto comment; + } + any { goto comment; } +*/ +} + +main(){ + Scanner in; + int t; + memset((char*) &in, 0, sizeof(in)); + in.fd = 0; + while((t = scan(&in)) != EOI){ +/* + printf("%d\\t%.*s\\n", t, in.cur - in.tok, in.tok); + printf("%d\\n", t); +*/ + } + close(in.fd); +} +.fi +.in -3 + +.SH "SEE ALSO" +.LP +flex(1), lex(1). + +.SH FEATURES +.LP +\*(re does not provide a default action: +the generated code assumes that the input +will consist of a sequence of tokens. +Typically this can be dealt with by adding a rule such as the one for +unexpected characters in the example above. +.LP +The user must arrange for a sentinel token to appear at the end of input +(and provide a rule for matching it): +\*(re does not provide an \fC<<EOF>>\fP expression. +If the source is from a null-byte terminated string, a +rule matching a null character will suffice. If the source is from a +file then the approach taken in the example can be used: pad the input with +a newline (or some other character that can't appear within another token); +upon recognizing such a character check to see if it is the sentinel +and act accordingly. +.LP +\*(re does not provide start conditions: use a separate scanner +specification for each start condition (as illustrated in the above example). +.LP +No [^x]. Use difference instead. +.SH BUGS +.LP +Only fixed length trailing context can be handled. +.LP +The maximum value appearing as a parameter \fIn\fP to \fCYYFILL\fP is not +provided to the generated code (this value is needed for constructing +the interface code). +Note that this value is usually relatively small: for +typical programming languages \fIn\fP will be the length of the longest +keyword plus one. +.LP +Difference only works for character sets. +.LP +The \*(re internal algorithms need documentation. + +.SH AUTHOR +.LP +Please send bug reports, fixes and feedback to: +.LP +.nf +Peter Bumbulis +Computer Systems Group +University of Waterloo +Waterloo, Ontario +N2L 3G1 +Internet: peterr@csg.uwaterloo.ca +.fi diff --git a/tools/re2c/scanner.c b/tools/re2c/scanner.c new file mode 100644 index 0000000..034c935 --- /dev/null +++ b/tools/re2c/scanner.c @@ -0,0 +1,748 @@ +/* Generated by re2c 0.9.1-C on Sun Oct 9 22:15:58 2005 + */ +#line 1 "scanner.re" +#include <stdlib.h> +#include <string.h> +#include "tools/re2c/scanner.h" +#include "tools/re2c/parse.h" +#include "tools/re2c/globals.h" +#include "tools/re2c/parser.h" + +#ifndef MAX +#define MAX(a,b) (((a)>(b))?(a):(b)) +#endif + +#define BSIZE 8192 + +#define YYCTYPE unsigned char +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RETURN(i) {s->cur = cursor; return i;} + +static unsigned char *fill(Scanner*, unsigned char*); + +void +Scanner_init(Scanner *s, FILE *i) +{ + s->in = i; + s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top = + s->eof = NULL; + s->tchar = s->tline = 0; + s->cline = 1; +} + +static unsigned char * +fill(Scanner *s, unsigned char *cursor) +{ + if(!s->eof){ + unsigned int cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + unsigned char *buf = malloc(((s->lim - s->bot) + BSIZE) + 1); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + if (s->bot) + free(s->bot); + s->bot = buf; + } + if((cnt = fread(s->lim, 1, BSIZE, s->in)) != BSIZE){ + s->eof = &s->lim[cnt]; *s->eof++ = '\0'; + } + s->lim += cnt; + } + return cursor; +} + +#line 79 "scanner.re" + + +int +Scanner_echo(Scanner *s, FILE *out) +{ + unsigned char *cursor = s->cur; + int ignore_eoc = 0; + + /* Catch EOF */ + if (s->eof && cursor == s->eof) + return 0; + + s->tok = cursor; +echo: + +#line 87 "scanner.c" +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy0; + ++YYCURSOR; +yy0: + if((YYLIMIT - YYCURSOR) < 11) YYFILL(11); + yych = *YYCURSOR; + if(yych <= ')'){ + if(yych <= '\000') goto yy7; + if(yych == '\n') goto yy5; + goto yy9; + } else { + if(yych <= '*') goto yy4; + if(yych != '/') goto yy9; + goto yy2; + } +yy2: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '*') goto yy12; + goto yy3; +yy3: +#line 117 "scanner.re" +{ goto echo; } +#line 112 "scanner.c" +yy4: yych = *++YYCURSOR; + if(yych == '/') goto yy10; + goto yy3; +yy5: yych = *++YYCURSOR; + goto yy6; +yy6: +#line 112 "scanner.re" +{ fwrite(s->tok, 1, cursor - s->tok, out); + s->tok = s->pos = cursor; s->cline++; oline++; + goto echo; } +#line 123 "scanner.c" +yy7: yych = *++YYCURSOR; + goto yy8; +yy8: +#line 115 "scanner.re" +{ fwrite(s->tok, 1, cursor - s->tok - 1, out); /* -1 so we don't write out the \0 */ + if(cursor == s->eof) { RETURN(0); } } +#line 130 "scanner.c" +yy9: yych = *++YYCURSOR; + goto yy3; +yy10: yych = *++YYCURSOR; + goto yy11; +yy11: +#line 103 "scanner.re" +{ + if (ignore_eoc) { + ignore_eoc = 0; + } else { + fwrite(s->tok, 1, cursor - s->tok, out); + } + s->tok = s->pos = cursor; + goto echo; + } +#line 146 "scanner.c" +yy12: yych = *++YYCURSOR; + if(yych == '!') goto yy14; + goto yy13; +yy13: YYCURSOR = YYMARKER; + switch(yyaccept){ + case 0: goto yy3; + } +yy14: yych = *++YYCURSOR; + if(yych == 'm') goto yy15; + if(yych == 'r') goto yy16; + goto yy13; +yy15: yych = *++YYCURSOR; + if(yych == 'a') goto yy21; + goto yy13; +yy16: yych = *++YYCURSOR; + if(yych != 'e') goto yy13; + goto yy17; +yy17: yych = *++YYCURSOR; + if(yych != '2') goto yy13; + goto yy18; +yy18: yych = *++YYCURSOR; + if(yych != 'c') goto yy13; + goto yy19; +yy19: yych = *++YYCURSOR; + goto yy20; +yy20: +#line 94 "scanner.re" +{ fwrite(s->tok, 1, &cursor[-7] - s->tok, out); + s->tok = cursor; + RETURN(1); } +#line 177 "scanner.c" +yy21: yych = *++YYCURSOR; + if(yych != 'x') goto yy13; + goto yy22; +yy22: yych = *++YYCURSOR; + if(yych != ':') goto yy13; + goto yy23; +yy23: yych = *++YYCURSOR; + if(yych != 'r') goto yy13; + goto yy24; +yy24: yych = *++YYCURSOR; + if(yych != 'e') goto yy13; + goto yy25; +yy25: yych = *++YYCURSOR; + if(yych != '2') goto yy13; + goto yy26; +yy26: yych = *++YYCURSOR; + if(yych != 'c') goto yy13; + goto yy27; +yy27: yych = *++YYCURSOR; + goto yy28; +yy28: +#line 97 "scanner.re" +{ + fprintf(out, "#define YYMAXFILL %u\n", maxFill); + s->tok = s->pos = cursor; + ignore_eoc = 1; + goto echo; + } +#line 206 "scanner.c" +} +#line 118 "scanner.re" + +} + + +int +Scanner_scan(Scanner *s) +{ + unsigned char *cursor = s->cur; + unsigned int depth; + +scan: + s->tchar = cursor - s->pos; + s->tline = s->cline; + s->tok = cursor; + +#line 224 "scanner.c" +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy29; + ++YYCURSOR; +yy29: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych <= '/'){ + if(yych <= '"'){ + if(yych <= '\n'){ + if(yych <= '\b') goto yy53; + if(yych <= '\t') goto yy47; + goto yy49; + } else { + if(yych == ' ') goto yy47; + if(yych <= '!') goto yy53; + goto yy37; + } + } else { + if(yych <= '*'){ + if(yych <= '&') goto yy53; + if(yych <= '\'') goto yy39; + if(yych <= ')') goto yy43; + goto yy35; + } else { + if(yych <= '+') goto yy44; + if(yych <= '-') goto yy53; + if(yych <= '.') goto yy51; + goto yy33; + } + } + } else { + if(yych <= '@'){ + if(yych <= '<'){ + if(yych == ';') goto yy43; + goto yy53; + } else { + if(yych <= '=') goto yy43; + if(yych == '?') goto yy44; + goto yy53; + } + } else { + if(yych <= '`'){ + if(yych <= 'Z') goto yy45; + if(yych <= '[') goto yy41; + if(yych <= '\\') goto yy43; + goto yy53; + } else { + if(yych <= 'z') goto yy45; + if(yych <= '{') goto yy31; + if(yych <= '|') goto yy43; + goto yy53; + } + } + } +yy31: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych <= '/') goto yy32; + if(yych <= '9') goto yy84; + goto yy32; +yy32: +#line 133 "scanner.re" +{ depth = 1; + goto code; + } +#line 291 "scanner.c" +yy33: yych = *++YYCURSOR; + if(yych == '*') goto yy82; + goto yy34; +yy34: +#line 163 "scanner.re" +{ RETURN(*s->tok); } +#line 298 "scanner.c" +yy35: yych = *++YYCURSOR; + if(yych == '/') goto yy80; + goto yy36; +yy36: +#line 165 "scanner.re" +{ yylval.op = *s->tok; + RETURN(CLOSE); } +#line 306 "scanner.c" +yy37: yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + if(yych != '\n') goto yy76; + goto yy38; +yy38: +#line 150 "scanner.re" +{ Scanner_fatal(s, "unterminated string constant (missing \")"); } +#line 314 "scanner.c" +yy39: yyaccept = 2; + yych = *(YYMARKER = ++YYCURSOR); + if(yych != '\n') goto yy71; + goto yy40; +yy40: +#line 151 "scanner.re" +{ Scanner_fatal(s, "unterminated string constant (missing ')"); } +#line 322 "scanner.c" +yy41: yyaccept = 3; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '\n') goto yy42; + if(yych == '^') goto yy62; + goto yy60; +yy42: +#line 161 "scanner.re" +{ Scanner_fatal(s, "unterminated range (missing ])"); } +#line 331 "scanner.c" +yy43: yych = *++YYCURSOR; + goto yy34; +yy44: yych = *++YYCURSOR; + goto yy36; +yy45: yych = *++YYCURSOR; + goto yy58; +yy46: +#line 180 "scanner.re" +{ SubStr substr; + s->cur = cursor; + substr = Scanner_token(s); + yylval.symbol = Symbol_find(&substr); + return ID; } +#line 345 "scanner.c" +yy47: yych = *++YYCURSOR; + goto yy56; +yy48: +#line 186 "scanner.re" +{ goto scan; } +#line 351 "scanner.c" +yy49: yych = *++YYCURSOR; + goto yy50; +yy50: +#line 188 "scanner.re" +{ if(cursor == s->eof) RETURN(0); + s->pos = cursor; s->cline++; + goto scan; + } +#line 360 "scanner.c" +yy51: yych = *++YYCURSOR; + goto yy52; +yy52: +#line 193 "scanner.re" +{ s->cur = cursor; + yylval.regexp = mkDot(); + return RANGE; + } +#line 369 "scanner.c" +yy53: yych = *++YYCURSOR; + goto yy54; +yy54: +#line 198 "scanner.re" +{ fprintf(stderr, "unexpected character: '%c'\n", *s->tok); + goto scan; + } +#line 377 "scanner.c" +yy55: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy56; +yy56: if(yych == '\t') goto yy55; + if(yych == ' ') goto yy55; + goto yy48; +yy57: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy58; +yy58: if(yych <= '@'){ + if(yych <= '/') goto yy46; + if(yych <= '9') goto yy57; + goto yy46; + } else { + if(yych <= 'Z') goto yy57; + if(yych <= '`') goto yy46; + if(yych <= 'z') goto yy57; + goto yy46; + } +yy59: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy60; +yy60: if(yych <= '['){ + if(yych != '\n') goto yy59; + goto yy61; + } else { + if(yych <= '\\') goto yy64; + if(yych <= ']') goto yy65; + goto yy59; + } +yy61: YYCURSOR = YYMARKER; + switch(yyaccept){ + case 0: goto yy32; + case 1: goto yy38; + case 2: goto yy40; + case 3: goto yy42; + } +yy62: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy63; +yy63: if(yych <= '['){ + if(yych == '\n') goto yy61; + goto yy62; + } else { + if(yych <= '\\') goto yy67; + if(yych <= ']') goto yy68; + goto yy62; + } +yy64: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy61; + goto yy59; +yy65: yych = *++YYCURSOR; + goto yy66; +yy66: +#line 157 "scanner.re" +{ s->cur = cursor; + yylval.regexp = ranToRE(Scanner_token(s)); + return RANGE; } +#line 442 "scanner.c" +yy67: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy61; + goto yy62; +yy68: yych = *++YYCURSOR; + goto yy69; +yy69: +#line 153 "scanner.re" +{ s->cur = cursor; + yylval.regexp = invToRE(Scanner_token(s)); + return RANGE; } +#line 455 "scanner.c" +yy70: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy71; +yy71: if(yych <= '&'){ + if(yych == '\n') goto yy61; + goto yy70; + } else { + if(yych <= '\'') goto yy73; + if(yych != '\\') goto yy70; + goto yy72; + } +yy72: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy61; + goto yy70; +yy73: yych = *++YYCURSOR; + goto yy74; +yy74: +#line 146 "scanner.re" +{ s->cur = cursor; + yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s)); + return STRING; } +#line 480 "scanner.c" +yy75: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy76; +yy76: if(yych <= '!'){ + if(yych == '\n') goto yy61; + goto yy75; + } else { + if(yych <= '"') goto yy78; + if(yych != '\\') goto yy75; + goto yy77; + } +yy77: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy61; + goto yy75; +yy78: yych = *++YYCURSOR; + goto yy79; +yy79: +#line 142 "scanner.re" +{ s->cur = cursor; + yylval.regexp = strToRE(Scanner_token(s)); + return STRING; } +#line 505 "scanner.c" +yy80: yych = *++YYCURSOR; + goto yy81; +yy81: +#line 139 "scanner.re" +{ s->tok = cursor; + RETURN(0); } +#line 512 "scanner.c" +yy82: yych = *++YYCURSOR; + goto yy83; +yy83: +#line 136 "scanner.re" +{ depth = 1; + goto comment; } +#line 519 "scanner.c" +yy84: ++YYCURSOR; + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + goto yy85; +yy85: if(yych <= '/'){ + if(yych == ',') goto yy88; + goto yy61; + } else { + if(yych <= '9') goto yy84; + if(yych != '}') goto yy61; + goto yy86; + } +yy86: yych = *++YYCURSOR; + goto yy87; +yy87: +#line 168 "scanner.re" +{ yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = atoi((char *)s->tok+1); + RETURN(CLOSESIZE); } +#line 539 "scanner.c" +yy88: yych = *++YYCURSOR; + if(yych != '}') goto yy92; + goto yy89; +yy89: yych = *++YYCURSOR; + goto yy90; +yy90: +#line 176 "scanner.re" +{ yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = -1; + RETURN(CLOSESIZE); } +#line 550 "scanner.c" +yy91: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy92; +yy92: if(yych <= '/') goto yy61; + if(yych <= '9') goto yy91; + if(yych != '}') goto yy61; + goto yy93; +yy93: yych = *++YYCURSOR; + goto yy94; +yy94: +#line 172 "scanner.re" +{ yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1)); + RETURN(CLOSESIZE); } +#line 566 "scanner.c" +} +#line 201 "scanner.re" + + +code: + +#line 573 "scanner.c" +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy95; + ++YYCURSOR; +yy95: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych <= '&'){ + if(yych <= '\n'){ + if(yych <= '\t') goto yy103; + goto yy101; + } else { + if(yych == '"') goto yy105; + goto yy103; + } + } else { + if(yych <= '{'){ + if(yych <= '\'') goto yy106; + if(yych <= 'z') goto yy103; + goto yy99; + } else { + if(yych != '}') goto yy103; + goto yy97; + } + } +yy97: yych = *++YYCURSOR; + goto yy98; +yy98: +#line 205 "scanner.re" +{ if(--depth == 0){ + s->cur = cursor; + yylval.token = Token_new(Scanner_token(s), s->tline); + return CODE; + } + goto code; } +#line 610 "scanner.c" +yy99: yych = *++YYCURSOR; + goto yy100; +yy100: +#line 211 "scanner.re" +{ ++depth; + goto code; } +#line 617 "scanner.c" +yy101: yych = *++YYCURSOR; + goto yy102; +yy102: +#line 213 "scanner.re" +{ if(cursor == s->eof) Scanner_fatal(s, "missing '}'"); + s->pos = cursor; s->cline++; + goto code; + } +#line 626 "scanner.c" +yy103: yych = *++YYCURSOR; + goto yy104; +yy104: +#line 217 "scanner.re" +{ goto code; } +#line 632 "scanner.c" +yy105: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '\n') goto yy104; + goto yy112; +yy106: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == '\n') goto yy104; + goto yy108; +yy107: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy108; +yy108: if(yych <= '&'){ + if(yych != '\n') goto yy107; + goto yy109; + } else { + if(yych <= '\'') goto yy103; + if(yych == '\\') goto yy110; + goto yy107; + } +yy109: YYCURSOR = YYMARKER; + switch(yyaccept){ + case 0: goto yy104; + } +yy110: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy109; + goto yy107; +yy111: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + goto yy112; +yy112: if(yych <= '!'){ + if(yych == '\n') goto yy109; + goto yy111; + } else { + if(yych <= '"') goto yy103; + if(yych != '\\') goto yy111; + goto yy113; + } +yy113: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych == '\n') goto yy109; + goto yy111; +} +#line 218 "scanner.re" + + +comment: + +#line 685 "scanner.c" +{ + YYCTYPE yych; + goto yy114; + ++YYCURSOR; +yy114: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + if(yych <= ')'){ + if(yych == '\n') goto yy119; + goto yy121; + } else { + if(yych <= '*') goto yy116; + if(yych == '/') goto yy118; + goto yy121; + } +yy116: yych = *++YYCURSOR; + if(yych == '/') goto yy124; + goto yy117; +yy117: +#line 232 "scanner.re" +{ goto comment; } +#line 707 "scanner.c" +yy118: yych = *++YYCURSOR; + if(yych == '*') goto yy122; + goto yy117; +yy119: yych = *++YYCURSOR; + goto yy120; +yy120: +#line 228 "scanner.re" +{ if(cursor == s->eof) RETURN(0); + s->tok = s->pos = cursor; s->cline++; + goto comment; + } +#line 719 "scanner.c" +yy121: yych = *++YYCURSOR; + goto yy117; +yy122: yych = *++YYCURSOR; + goto yy123; +yy123: +#line 226 "scanner.re" +{ ++depth; + goto comment; } +#line 728 "scanner.c" +yy124: yych = *++YYCURSOR; + goto yy125; +yy125: +#line 222 "scanner.re" +{ if(--depth == 0) + goto scan; + else + goto comment; } +#line 737 "scanner.c" +} +#line 233 "scanner.re" + +} + +void +Scanner_fatal(Scanner *s, const char *msg) +{ + fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg); + exit(1); +} diff --git a/tools/re2c/scanner.h b/tools/re2c/scanner.h new file mode 100644 index 0000000..a5720b7 --- /dev/null +++ b/tools/re2c/scanner.h @@ -0,0 +1,44 @@ +#ifndef _scanner_h +#define _scanner_h + +#include <stdio.h> +#include "tools/re2c/token.h" + +typedef struct Scanner { + FILE *in; + unsigned char *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + unsigned int tchar, tline, cline; +} Scanner; + +void Scanner_init(Scanner*, FILE *); +static Scanner *Scanner_new(FILE *); + +int Scanner_echo(Scanner*, FILE *); +int Scanner_scan(Scanner*); +void Scanner_fatal(Scanner*, const char*); +static SubStr Scanner_token(Scanner*); +static unsigned int Scanner_line(Scanner*); + +static SubStr +Scanner_token(Scanner *s) +{ + SubStr r; + SubStr_init_u(&r, s->tok, s->cur - s->tok); + return r; +} + +static unsigned int +Scanner_line(Scanner *s) +{ + return s->cline; +} + +static Scanner * +Scanner_new(FILE *i) +{ + Scanner *r = malloc(sizeof(Scanner)); + Scanner_init(r, i); + return r; +} + +#endif diff --git a/tools/re2c/scanner.re b/tools/re2c/scanner.re new file mode 100644 index 0000000..423835b --- /dev/null +++ b/tools/re2c/scanner.re @@ -0,0 +1,241 @@ +#include <stdlib.h> +#include <string.h> +#include "tools/re2c/scanner.h" +#include "tools/re2c/parse.h" +#include "tools/re2c/globals.h" +#include "tools/re2c/parser.h" + +#ifndef MAX +#define MAX(a,b) (((a)>(b))?(a):(b)) +#endif + +#define BSIZE 8192 + +#define YYCTYPE unsigned char +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RETURN(i) {s->cur = cursor; return i;} + +static unsigned char *fill(Scanner*, unsigned char*); + +void +Scanner_init(Scanner *s, FILE *i) +{ + s->in = i; + s->bot = s->tok = s->ptr = s->cur = s->pos = s->lim = s->top = + s->eof = NULL; + s->tchar = s->tline = 0; + s->cline = 1; +} + +static unsigned char * +fill(Scanner *s, unsigned char *cursor) +{ + if(!s->eof){ + unsigned int cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + unsigned char *buf = malloc(((s->lim - s->bot) + BSIZE) + 1); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + if (s->bot) + free(s->bot); + s->bot = buf; + } + if((cnt = fread(s->lim, 1, BSIZE, s->in)) != BSIZE){ + s->eof = &s->lim[cnt]; *s->eof++ = '\0'; + } + s->lim += cnt; + } + return cursor; +} + +/*!re2c +zero = "\000"; +any = [\000-\377]; +dot = any \ [\n]; +esc = dot \ [\\]; +istring = "[" "^" ((esc \ [\]]) | "\\" dot)* "]" ; +cstring = "[" ((esc \ [\]]) | "\\" dot)* "]" ; +dstring = "\"" ((esc \ ["] ) | "\\" dot)* "\""; +sstring = "'" ((esc \ ['] ) | "\\" dot)* "'" ; +letter = [a-zA-Z]; +digit = [0-9]; +*/ + +int +Scanner_echo(Scanner *s, FILE *out) +{ + unsigned char *cursor = s->cur; + int ignore_eoc = 0; + + /* Catch EOF */ + if (s->eof && cursor == s->eof) + return 0; + + s->tok = cursor; +echo: +/*!re2c + "/*!re2c" { fwrite(s->tok, 1, &cursor[-7] - s->tok, out); + s->tok = cursor; + RETURN(1); } + "/*!max:re2c" { + fprintf(out, "#define YYMAXFILL %u\n", maxFill); + s->tok = s->pos = cursor; + ignore_eoc = 1; + goto echo; + } + "*" "/" { + if (ignore_eoc) { + ignore_eoc = 0; + } else { + fwrite(s->tok, 1, cursor - s->tok, out); + } + s->tok = s->pos = cursor; + goto echo; + } + "\n" { fwrite(s->tok, 1, cursor - s->tok, out); + s->tok = s->pos = cursor; s->cline++; oline++; + goto echo; } + zero { fwrite(s->tok, 1, cursor - s->tok - 1, out); /* -1 so we don't write out the \0 */ + if(cursor == s->eof) { RETURN(0); } } + any { goto echo; } +*/ +} + + +int +Scanner_scan(Scanner *s) +{ + unsigned char *cursor = s->cur; + unsigned int depth; + +scan: + s->tchar = cursor - s->pos; + s->tline = s->cline; + s->tok = cursor; +/*!re2c + "{" { depth = 1; + goto code; + } + "/*" { depth = 1; + goto comment; } + + "*/" { s->tok = cursor; + RETURN(0); } + + dstring { s->cur = cursor; + yylval.regexp = strToRE(Scanner_token(s)); + return STRING; } + + sstring { s->cur = cursor; + yylval.regexp = strToCaseInsensitiveRE(Scanner_token(s)); + return STRING; } + + "\"" { Scanner_fatal(s, "unterminated string constant (missing \")"); } + "'" { Scanner_fatal(s, "unterminated string constant (missing ')"); } + + istring { s->cur = cursor; + yylval.regexp = invToRE(Scanner_token(s)); + return RANGE; } + + cstring { s->cur = cursor; + yylval.regexp = ranToRE(Scanner_token(s)); + return RANGE; } + + "[" { Scanner_fatal(s, "unterminated range (missing ])"); } + + [()|=;/\\] { RETURN(*s->tok); } + + [*+?] { yylval.op = *s->tok; + RETURN(CLOSE); } + + "{" [0-9]+ "}" { yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = atoi((char *)s->tok+1); + RETURN(CLOSESIZE); } + + "{" [0-9]+ "," [0-9]+ "}" { yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)s->tok, ',')+1)); + RETURN(CLOSESIZE); } + + "{" [0-9]+ ",}" { yylval.extop.minsize = atoi((char *)s->tok+1); + yylval.extop.maxsize = -1; + RETURN(CLOSESIZE); } + + letter (letter|digit)* { SubStr substr; + s->cur = cursor; + substr = Scanner_token(s); + yylval.symbol = Symbol_find(&substr); + return ID; } + + [ \t]+ { goto scan; } + + "\n" { if(cursor == s->eof) RETURN(0); + s->pos = cursor; s->cline++; + goto scan; + } + + "." { s->cur = cursor; + yylval.regexp = mkDot(); + return RANGE; + } + + any { fprintf(stderr, "unexpected character: '%c'\n", *s->tok); + goto scan; + } +*/ + +code: +/*!re2c + "}" { if(--depth == 0){ + s->cur = cursor; + yylval.token = Token_new(Scanner_token(s), s->tline); + return CODE; + } + goto code; } + "{" { ++depth; + goto code; } + "\n" { if(cursor == s->eof) Scanner_fatal(s, "missing '}'"); + s->pos = cursor; s->cline++; + goto code; + } + dstring | sstring | any { goto code; } +*/ + +comment: +/*!re2c + "*/" { if(--depth == 0) + goto scan; + else + goto comment; } + "/*" { ++depth; + goto comment; } + "\n" { if(cursor == s->eof) RETURN(0); + s->tok = s->pos = cursor; s->cline++; + goto comment; + } + any { goto comment; } +*/ +} + +void +Scanner_fatal(Scanner *s, const char *msg) +{ + fprintf(stderr, "line %d, column %d: %s\n", s->tline, s->tchar + 1, msg); + exit(1); +} diff --git a/tools/re2c/substr.c b/tools/re2c/substr.c new file mode 100644 index 0000000..c750fb9 --- /dev/null +++ b/tools/re2c/substr.c @@ -0,0 +1,65 @@ +#include <string.h> +#include "tools/re2c/substr.h" +#include "tools/re2c/globals.h" + +void +SubStr_out(const SubStr *s, FILE *o) +{ + unsigned int i; + fwrite(s->str, s->len, 1, o); + for (i=0; i<s->len; i++) + if (s->str[i] == '\n') + oline++; +} + +int +SubStr_eq(const SubStr *s1, const SubStr *s2) +{ + return (s1->len == s2->len && memcmp(s1->str, s2->str, s1->len) == 0); +} + +void +Str_init(Str *r, const SubStr* s) +{ + SubStr_init(r, malloc(sizeof(char)*s->len), s->len); + memcpy(r->str, s->str, s->len); +} + +Str * +Str_new(const SubStr* s) +{ + Str *r = SubStr_new(malloc(sizeof(char)*s->len), s->len); + memcpy(r->str, s->str, s->len); + return r; +} + +void +Str_copy(Str *r, Str* s) +{ + SubStr_init(r, s->str, s->len); + s->str = NULL; + s->len = 0; +} + +Str * +Str_new_copy(Str* s) +{ + Str *r = SubStr_new(s->str, s->len); + s->str = NULL; + s->len = 0; + return r; +} + +Str * +Str_new_empty(void) +{ + return SubStr_new(NULL, 0); +} + + +void Str_delete(Str *s) { + free(s->str); + s->str = (char*)-1; + s->len = (unsigned int)-1; + free(s); +} diff --git a/tools/re2c/substr.h b/tools/re2c/substr.h new file mode 100644 index 0000000..0a19b93 --- /dev/null +++ b/tools/re2c/substr.h @@ -0,0 +1,89 @@ +#ifndef re2c_substr_h +#define re2c_substr_h + +#include <stdio.h> +#include <stdlib.h> +#include "tools/re2c/basics.h" + +struct SubStr { + char *str; + unsigned int len; +}; + +typedef struct SubStr SubStr; + +int SubStr_eq(const SubStr *, const SubStr *); + +static void SubStr_init_u(SubStr*, unsigned char*, unsigned int); +static SubStr *SubStr_new_u(unsigned char*, unsigned int); + +static void SubStr_init(SubStr*, char*, unsigned int); +static SubStr *SubStr_new(char*, unsigned int); + +static void SubStr_copy(SubStr*, const SubStr*); +static SubStr *SubStr_new_copy(const SubStr*); + +void SubStr_out(const SubStr*, FILE *); +#define SubStr_delete(x) free(x) + +typedef struct SubStr Str; + +void Str_init(Str*, const SubStr*); +Str *Str_new(const SubStr*); + +void Str_copy(Str*, Str*); +Str *Str_new_copy(Str*); + +Str *Str_new_empty(void); +void Str_destroy(Str *); +void Str_delete(Str *); + +static void +SubStr_init_u(SubStr *r, unsigned char *s, unsigned int l) +{ + r->str = (char*)s; + r->len = l; +} + +static SubStr * +SubStr_new_u(unsigned char *s, unsigned int l) +{ + SubStr *r = malloc(sizeof(SubStr)); + r->str = (char*)s; + r->len = l; + return r; +} + +static void +SubStr_init(SubStr *r, char *s, unsigned int l) +{ + r->str = s; + r->len = l; +} + +static SubStr * +SubStr_new(char *s, unsigned int l) +{ + SubStr *r = malloc(sizeof(SubStr)); + r->str = s; + r->len = l; + return r; +} + +static void +SubStr_copy(SubStr *r, const SubStr *s) +{ + r->str = s->str; + r->len = s->len; +} + +static SubStr * +SubStr_new_copy(const SubStr *s) +{ + SubStr *r = malloc(sizeof(SubStr)); + r->str = s->str; + r->len = s->len; + return r; +} + +#endif diff --git a/tools/re2c/token.h b/tools/re2c/token.h new file mode 100644 index 0000000..d50a46d --- /dev/null +++ b/tools/re2c/token.h @@ -0,0 +1,30 @@ +#ifndef re2c_token_h +#define re2c_token_h + +#include "substr.h" + +typedef struct Token { + Str text; + unsigned int line; +} Token; + +static void Token_init(Token *, SubStr, unsigned int); +static Token *Token_new(SubStr, unsigned int); + +static void +Token_init(Token *r, SubStr t, unsigned int l) +{ + Str_copy(&r->text, &t); + r->line = l; +} + +static Token * +Token_new(SubStr t, unsigned int l) +{ + Token *r = malloc(sizeof(Token)); + Str_init(&r->text, &t); + r->line = l; + return r; +} + +#endif diff --git a/tools/re2c/translate.c b/tools/re2c/translate.c new file mode 100644 index 0000000..7ba173e --- /dev/null +++ b/tools/re2c/translate.c @@ -0,0 +1,61 @@ +#include "tools/re2c/globals.h" + +unsigned char asc2asc[256] = { +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, +0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, +0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, +0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, +0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, +0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, +0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, +0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, +0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, +0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff +}; + +unsigned char *xlat = asc2asc; +unsigned char *talx = asc2asc; + +unsigned char asc2ebc[256] = { /* Based on ISO 8859/1 and Code Page 37 */ +0x00,0x01,0x02,0x03,0x37,0x2d,0x2e,0x2f,0x16,0x05,0x25,0x0b,0x0c,0x0d,0x0e,0x0f, +0x10,0x11,0x12,0x13,0x3c,0x3d,0x32,0x26,0x18,0x19,0x3f,0x27,0x1c,0x1d,0x1e,0x1f, +0x40,0x5a,0x7f,0x7b,0x5b,0x6c,0x50,0x7d,0x4d,0x5d,0x5c,0x4e,0x6b,0x60,0x4b,0x61, +0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0x7a,0x5e,0x4c,0x7e,0x6e,0x6f, +0x7c,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, +0xd7,0xd8,0xd9,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xba,0xe0,0xbb,0xb0,0x6d, +0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, +0x97,0x98,0x99,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xc0,0x4f,0xd0,0xa1,0x07, +0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x09,0x0a,0x1b, +0x30,0x31,0x1a,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3a,0x3b,0x04,0x14,0x3e,0xff, +0x41,0xaa,0x4a,0xb1,0x9f,0xb2,0x6a,0xb5,0xbd,0xb4,0x9a,0x8a,0x5f,0xca,0xaf,0xbc, +0x90,0x8f,0xea,0xfa,0xbe,0xa0,0xb6,0xb3,0x9d,0xda,0x9b,0x8b,0xb7,0xb8,0xb9,0xab, +0x64,0x65,0x62,0x66,0x63,0x67,0x9e,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77, +0xac,0x69,0xed,0xee,0xeb,0xef,0xec,0xbf,0x80,0xfd,0xfe,0xfb,0xfc,0xad,0x8e,0x59, +0x44,0x45,0x42,0x46,0x43,0x47,0x9c,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57, +0x8c,0x49,0xcd,0xce,0xcb,0xcf,0xcc,0xe1,0x70,0xdd,0xde,0xdb,0xdc,0x8d,0xae,0xdf +}; + +unsigned char ebc2asc[256] = { /* Based on ISO 8859/1 and Code Page 37 */ +0x00,0x01,0x02,0x03,0x9c,0x09,0x86,0x7f,0x97,0x8d,0x8e,0x0b,0x0c,0x0d,0x0e,0x0f, +0x10,0x11,0x12,0x13,0x9d,0x85,0x08,0x87,0x18,0x19,0x92,0x8f,0x1c,0x1d,0x1e,0x1f, +0x80,0x81,0x82,0x83,0x84,0x0a,0x17,0x1b,0x88,0x89,0x8a,0x8b,0x8c,0x05,0x06,0x07, +0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9a,0x9b,0x14,0x15,0x9e,0x1a, +0x20,0xa0,0xe2,0xe4,0xe0,0xe1,0xe3,0xe5,0xe7,0xf1,0xa2,0x2e,0x3c,0x28,0x2b,0x7c, +0x26,0xe9,0xea,0xeb,0xe8,0xed,0xee,0xef,0xec,0xdf,0x21,0x24,0x2a,0x29,0x3b,0xac, +0x2d,0x2f,0xc2,0xc4,0xc0,0xc1,0xc3,0xc5,0xc7,0xd1,0xa6,0x2c,0x25,0x5f,0x3e,0x3f, +0xf8,0xc9,0xca,0xcb,0xc8,0xcd,0xce,0xcf,0xcc,0x60,0x3a,0x23,0x40,0x27,0x3d,0x22, +0xd8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xab,0xbb,0xf0,0xfd,0xde,0xb1, +0xb0,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0xaa,0xba,0xe6,0xb8,0xc6,0xa4, +0xb5,0x7e,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0xa1,0xbf,0xd0,0xdd,0xfe,0xae, +0x5e,0xa3,0xa5,0xb7,0xa9,0xa7,0xb6,0xbc,0xbd,0xbe,0x5b,0x5d,0xaf,0xa8,0xb4,0xd7, +0x7b,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xad,0xf4,0xf6,0xf2,0xf3,0xf5, +0x7d,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0xb9,0xfb,0xfc,0xf9,0xfa,0xff, +0x5c,0xf7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0xb2,0xd4,0xd6,0xd2,0xd3,0xd5, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xb3,0xdb,0xdc,0xd9,0xda,0x9f +}; |