/* Parser simulator for unifying counterexample search Copyright (C) 2020-2021 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include "parse-simulation.h" #include #include #include #include "lssi.h" #include "nullable.h" struct parse_state { // Path of state-items the parser has traversed. struct si_chunk { // Elements newly added in this chunk. state_item_list contents; // Properties of the linked list this chunk represents. const state_item *head_elt; const state_item *tail_elt; size_t total_size; } state_items; // List of derivations of the symbols. struct deriv_chunk { derivation_list contents; const derivation *head_elt; const derivation *tail_elt; size_t total_size; } derivs; struct parse_state *parent; int reference_count; // Incremented during productions, decremented during reductions. int depth; // Whether the contents of the chunks should be prepended or // appended to the list the chunks represent. bool prepend; // Causes chunk contents to be freed when the reference count is // one. Used when only the chunk metadata will be needed. bool free_contents_early; }; static void ps_si_prepend (parse_state *ps, const state_item *si) { struct si_chunk *sic = &ps->state_items; gl_list_add_first (sic->contents, si); sic->head_elt = si; ++sic->total_size; if (!sic->tail_elt) sic->tail_elt = si; } static void ps_si_append (parse_state *ps, const state_item *si) { struct si_chunk *sic = &ps->state_items; gl_list_add_last (sic->contents, si); sic->tail_elt = si; ++sic->total_size; if (!sic->head_elt) sic->head_elt = si; } static void ps_derivs_prepend (parse_state *ps, derivation *d) { struct deriv_chunk *dc = &ps->derivs; derivation_list_prepend (dc->contents, d); dc->head_elt = d; ++dc->total_size; if (!dc->tail_elt) dc->tail_elt = d; } static void ps_derivs_append (parse_state *ps, derivation *d) { struct deriv_chunk *dc = &ps->derivs; derivation_list_append (dc->contents, d); dc->tail_elt = d; ++dc->total_size; if (!dc->head_elt) dc->head_elt = d; } static int allocs = 0; static int frees = 0; static parse_state * empty_parse_state (void) { parse_state *res = xcalloc (1, sizeof *res); res->state_items.contents = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true); res->derivs.contents = derivation_list_new (); ++allocs; return res; } parse_state * new_parse_state (const state_item *si) { parse_state *res = empty_parse_state (); ps_si_append (res, si); ps_derivs_append (res, derivation_dot ()); return res; } static parse_state * copy_parse_state (bool prepend, parse_state *parent) { parse_state *res = xmalloc (sizeof *res); *res = *parent; res->state_items.contents = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true); res->derivs.contents = derivation_list_new (); res->parent = parent; res->prepend = prepend; res->reference_count = 0; res->free_contents_early = false; parse_state_retain (parent); ++allocs; return res; } bool parse_state_derivation_completed (const parse_state *ps) { return ps->derivs.total_size == 1; } derivation * parse_state_derivation (const parse_state *ps) { return (derivation *) ps->derivs.head_elt; } const state_item * parse_state_head (const parse_state *ps) { return ps->state_items.head_elt; } const state_item * parse_state_tail (const parse_state *ps) { return ps->state_items.tail_elt; } int parse_state_length (const parse_state *ps) { return ps->state_items.total_size; } int parse_state_depth (const parse_state *ps) { return ps->depth; } void parse_state_retain (parse_state *ps) { ++ps->reference_count; } void parse_state_free_contents_early (parse_state *ps) { ps->free_contents_early = true; } void free_parse_state (parse_state *original_ps) { bool free_contents = true; parse_state *parent_ps = NULL; for (parse_state *ps = original_ps; ps && free_contents; ps = parent_ps) { --ps->reference_count; free_contents = (ps->reference_count == 1 && ps->free_contents_early) || (ps->reference_count == 0 && !ps->free_contents_early); // need to keep the parse state around for visited hash set, // but its contents and parent can be freed if (free_contents) { if (ps->state_items.contents) gl_list_free (ps->state_items.contents); if (ps->derivs.contents) derivation_list_free (ps->derivs.contents); } parent_ps = ps->parent; if (ps->reference_count <= 0) { free (ps); ++frees; } } } size_t parse_state_hasher (const parse_state *ps, size_t max) { const struct si_chunk *sis = &ps->state_items; return ((state_item *) sis->head_elt - state_items + (state_item *) sis->tail_elt - state_items + sis->total_size) % max; } bool parse_state_comparator (const parse_state *ps1, const parse_state *ps2) { const struct si_chunk *sis1 = &ps1->state_items; const struct si_chunk *sis2 = &ps2->state_items; return sis1->head_elt == sis2->head_elt && sis1->tail_elt == sis2->tail_elt && sis1->total_size == sis2->total_size; } void parse_state_completed_steps (const parse_state *ps, int *shifts, int *productions) { // traverse to the root parse_state, // which will have a list of all completed productions. const parse_state *root_ps = ps; while (root_ps->parent) root_ps = root_ps->parent; state_item_list sis = root_ps->state_items.contents; int count = 0; state_item *last = NULL; state_item *next = NULL; for (gl_list_iterator_t it = gl_list_iterator (sis); state_item_list_next (&it, &next); ) { if (last && last->state == next->state) ++count; last = next; } *productions = count; *shifts = root_ps->state_items.total_size - count; } typedef void (*chunk_append_fn) (gl_list_t, const void *); // A version of gl_list_add_last which has the chunk_append_fn // signature. static void list_add_last (gl_list_t list, const void *elt) { gl_list_add_last (list, elt); } // takes an array of n gl_lists and flattens them into two list // based off of the index split static void list_flatten_and_split (gl_list_t *list, gl_list_t *rets, int split, int n, chunk_append_fn append_fn) { int ret_index = 0; int ret_array = 0; for (int i = 0; i < n; ++i) { const void *p = NULL; gl_list_iterator_t it = gl_list_iterator (list[i]); while (gl_list_iterator_next (&it, &p, NULL)) if (p) { gl_list_t l = (gl_list_t) p; const void *si = NULL; gl_list_iterator_t it2 = gl_list_iterator (l); while (gl_list_iterator_next (&it2, &si, NULL)) { if (ret_index++ == split) ++ret_array; if (rets[ret_array]) append_fn (rets[ret_array], si); } gl_list_iterator_free (&it2); } gl_list_iterator_free (&it); } } static parse_state_list parse_state_list_new (void) { return gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, (gl_listelement_dispose_fn)free_parse_state, true); } static void parse_state_list_append (parse_state_list pl, parse_state *ps) { parse_state_retain (ps); gl_list_add_last (pl, ps); } // Emulates a reduction on a parse state by popping some amount of // derivations and state_items off of the parse_state and returning // the result in ret. Returns the derivation of what's popped. static derivation_list parser_pop (parse_state *ps, int deriv_index, int si_index, parse_state *ret) { // prepend sis, append sis, prepend derivs, append derivs gl_list_t chunks[4]; for (int i = 0; i < 4; ++i) chunks[i] = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true); for (parse_state *pn = ps; pn != NULL; pn = pn->parent) if (pn->prepend) { gl_list_add_last (chunks[0], pn->state_items.contents); gl_list_add_last (chunks[2], pn->derivs.contents); } else { gl_list_add_first (chunks[1], pn->state_items.contents); gl_list_add_first (chunks[3], pn->derivs.contents); } derivation_list popped_derivs = derivation_list_new (); gl_list_t ret_chunks[4] = { ret->state_items.contents, NULL, ret->derivs.contents, popped_derivs }; list_flatten_and_split (chunks, ret_chunks, si_index, 2, list_add_last); list_flatten_and_split (chunks + 2, ret_chunks + 2, deriv_index, 2, (chunk_append_fn)derivation_list_append); size_t s_size = gl_list_size (ret->state_items.contents); ret->state_items.total_size = s_size; if (s_size > 0) { ret->state_items.tail_elt = gl_list_get_at (ret->state_items.contents, s_size - 1); ret->state_items.head_elt = gl_list_get_at (ret->state_items.contents, 0); } else { ret->state_items.tail_elt = NULL; ret->state_items.head_elt = NULL; } size_t d_size = gl_list_size (ret->derivs.contents); ret->derivs.total_size = d_size; if (d_size > 0) { ret->derivs.tail_elt = gl_list_get_at (ret->derivs.contents, d_size - 1); ret->derivs.head_elt = gl_list_get_at (ret->derivs.contents, 0); } else { ret->derivs.tail_elt = NULL; ret->derivs.head_elt = NULL; } for (int i = 0; i < 4; ++i) gl_list_free (chunks[i]); return popped_derivs; } void parse_state_lists (parse_state *ps, state_item_list *sitems, derivation_list *derivs) { parse_state *temp = empty_parse_state (); size_t si_size = ps->state_items.total_size; size_t deriv_size = ps->derivs.total_size; derivation_list dl = parser_pop (ps, si_size, deriv_size, temp); *sitems = temp->state_items.contents; *derivs = temp->derivs.contents; // prevent the return lists from being freed temp->state_items.contents = NULL; temp->derivs.contents = NULL; free_parse_state (temp); derivation_list_free (dl); } /** * Compute the parse states that result from taking a transition on * nullable symbols whenever possible from the given state_item. */ static void nullable_closure (parse_state *ps, state_item *si, parse_state_list state_list) { parse_state *current_ps = ps; state_item_number prev_sin = si - state_items; for (state_item_number sin = si->trans; sin != -1; prev_sin = sin, sin = state_items[sin].trans) { state_item *psi = &state_items[prev_sin]; symbol_number sp = item_number_as_symbol_number (*psi->item); if (ISTOKEN (sp) || !nullable[sp - ntokens]) break; state_item *nsi = &state_items[sin]; current_ps = copy_parse_state (false, current_ps); ps_si_append (current_ps, nsi); ps_derivs_append (current_ps, derivation_new (sp, derivation_list_new (), state_item_rule (nsi))); parse_state_list_append (state_list, current_ps); } } parse_state_list simulate_transition (parse_state *ps) { const state_item *si = ps->state_items.tail_elt; symbol_number sym = item_number_as_symbol_number (*si->item); // Transition on the same next symbol, taking nullable // symbols into account. parse_state_list result = parse_state_list_new (); state_item_number si_next = si->trans; // Check for disabled transition, shouldn't happen as any // state_items that lead to these should be disabled. if (si_next < 0) return result; parse_state *next_ps = copy_parse_state (false, ps); ps_si_append (next_ps, &state_items[si_next]); ps_derivs_append (next_ps, derivation_new_leaf (sym)); parse_state_list_append (result, next_ps); nullable_closure (next_ps, &state_items[si_next], result); return result; } /** * Determine if the given symbols are equal or their first sets * intersect. */ static bool compatible (symbol_number sym1, symbol_number sym2) { if (sym1 == sym2) return true; if (ISTOKEN (sym1) && ISVAR (sym2)) return bitset_test (FIRSTS (sym2), sym1); else if (ISVAR (sym1) && ISTOKEN (sym2)) return bitset_test (FIRSTS (sym1), sym2); else if (ISVAR (sym1) && ISVAR (sym2)) return !bitset_disjoint_p (FIRSTS (sym1), FIRSTS (sym2)); else return false; } parse_state_list simulate_production (parse_state *ps, symbol_number compat_sym) { parse_state_list result = parse_state_list_new (); const state_item *si = parse_state_tail (ps); if (si->prods) { bitset_iterator biter; state_item_number sin; BITSET_FOR_EACH (biter, si->prods, sin, 0) { // Take production step only if lhs is not nullable and // if first rhs symbol is compatible with compat_sym state_item *next = &state_items[sin]; item_number *itm1 = next->item; if (!compatible (*itm1, compat_sym) || !production_allowed (si, next)) continue; parse_state *next_ps = copy_parse_state (false, ps); ps_si_append (next_ps, next); parse_state_list_append (result, next_ps); if (next_ps->depth >= 0) ++next_ps->depth; nullable_closure (next_ps, next, result); } } return result; } // simulates a reduction on the given parse state, conflict_item is the // item associated with ps's conflict. symbol_set is a lookahead set this // reduction must be compatible with parse_state_list simulate_reduction (parse_state *ps, int rule_len, bitset symbol_set) { parse_state_list result = parse_state_list_new (); int s_size = ps->state_items.total_size; int d_size = ps->derivs.total_size; if (ps->depth >= 0) d_size--; // account for dot parse_state *new_root = empty_parse_state (); derivation_list popped_derivs = parser_pop (ps, d_size - rule_len, s_size - rule_len - 1, new_root); // update derivation state_item *si = (state_item *) ps->state_items.tail_elt; const rule *r = item_rule (si->item); symbol_number lhs = r->lhs->number; derivation *deriv = derivation_new (lhs, popped_derivs, state_item_rule (si)); --new_root->depth; ps_derivs_append (new_root, deriv); if (s_size != rule_len + 1) { state_item *tail = (state_item *) new_root->state_items.tail_elt; ps_si_append (new_root, &state_items[tail->trans]); parse_state_list_append (result, new_root); } else { // The head state_item is a production item, so we need to prepend // with possible source state-items. const state_item *head = ps->state_items.head_elt; state_item_list prev = lssi_reverse_production (head, symbol_set); // TODO: better understand what causes this case. if (gl_list_size (prev) == 0) { // new_root needs to have an RC of 1 to be freed correctly here. parse_state_retain (new_root); free_parse_state (new_root); } else { state_item *psis = NULL; for (gl_list_iterator_t it = gl_list_iterator (prev); state_item_list_next (&it, &psis); ) { // Prepend the result from the reverse production. parse_state *copy = copy_parse_state (true, new_root); ps_si_prepend (copy, psis); // Append the left hand side to the end of the parser state copy = copy_parse_state (false, copy); struct si_chunk *sis = ©->state_items; const state_item *tail = sis->tail_elt; ps_si_append (copy, &state_items[tail->trans]); parse_state_list_append (result, copy); nullable_closure (copy, (state_item *) sis->tail_elt, result); } } gl_list_free (prev); } return result; } parse_state_list parser_prepend (parse_state *ps) { parse_state_list res = parse_state_list_new (); const state_item *head = ps->state_items.head_elt; symbol_number prepend_sym = item_number_as_symbol_number (*(head->item - 1)); bitset_iterator biter; state_item_number sin; BITSET_FOR_EACH (biter, head->revs, sin, 0) { parse_state *copy = copy_parse_state (true, ps); ps_si_prepend (copy, &state_items[sin]); if (SI_TRANSITION (head)) ps_derivs_prepend (copy, derivation_new_leaf (prepend_sym)); parse_state_list_append (res, copy); } return res; } void print_parse_state (parse_state *ps) { FILE *out = stderr; fprintf (out, "(size %zu depth %d rc %d)\n", ps->state_items.total_size, ps->depth, ps->reference_count); state_item_print (ps->state_items.head_elt, out, ""); state_item_print (ps->state_items.tail_elt, out, ""); if (ps->derivs.total_size > 0) derivation_print (ps->derivs.head_elt, out, ""); putc ('\n', out); }