1 files changed, 30 insertions, 12 deletions
diff --git a/src/compile.rs b/src/compile.rs
index 9ffd347..9a2ed5e 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -4,16 +4,16 @@ use std::iter;
 use std::result;
 use std::sync::Arc;
 
-use syntax::hir::{self, Hir};
-use syntax::is_word_byte;
-use syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
+use regex_syntax::hir::{self, Hir};
+use regex_syntax::is_word_byte;
+use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
 
-use prog::{
+use crate::prog::{
     EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges,
     InstSave, InstSplit, Program,
 };
 
-use Error;
+use crate::Error;
 
 type Result = result::Result<Patch, Error>;
 type ResultOrEmpty = result::Result<Option<Patch>, Error>;
@@ -38,6 +38,7 @@ pub struct Compiler {
     suffix_cache: SuffixCache,
     utf8_seqs: Option<Utf8Sequences>,
     byte_classes: ByteClassSet,
+    extra_inst_bytes: usize,
 }
 
 impl Compiler {
@@ -54,6 +55,7 @@ impl Compiler {
             suffix_cache: SuffixCache::new(1000),
             utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')),
             byte_classes: ByteClassSet::new(),
+            extra_inst_bytes: 0,
         }
     }
 
@@ -253,8 +255,8 @@ impl Compiler {
     /// Ok(None) is returned when an expression is compiled to no
     /// instruction, and so no patch.entry value makes sense.
     fn c(&mut self, expr: &Hir) -> ResultOrEmpty {
-        use prog;
-        use syntax::hir::HirKind::*;
+        use crate::prog;
+        use regex_syntax::hir::HirKind::*;
 
         self.check_size()?;
         match *expr.kind() {
@@ -316,6 +318,13 @@ impl Compiler {
                 }
                 self.compiled.has_unicode_word_boundary = true;
                 self.byte_classes.set_word_boundary();
+                // We also make sure that all ASCII bytes are in a different
+                // class from non-ASCII bytes. Otherwise, it's possible for
+                // ASCII bytes to get lumped into the same class as non-ASCII
+                // bytes. This in turn may cause the lazy DFA to falsely start
+                // when it sees an ASCII byte that maps to a byte class with
+                // non-ASCII bytes. This ensures that never happens.
+                self.byte_classes.set_range(0, 0x7F);
                 self.c_empty_look(prog::EmptyLook::WordBoundary)
             }
             WordBoundary(hir::WordBoundary::UnicodeNegate) => {
@@ -328,6 +337,8 @@ impl Compiler {
                 }
                 self.compiled.has_unicode_word_boundary = true;
                 self.byte_classes.set_word_boundary();
+                // See comments above for why we set the ASCII range here.
+                self.byte_classes.set_range(0, 0x7F);
                 self.c_empty_look(prog::EmptyLook::NotWordBoundary)
             }
             WordBoundary(hir::WordBoundary::Ascii) => {
@@ -420,6 +431,8 @@ impl Compiler {
     }
 
     fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
+        use std::mem::size_of;
+
         assert!(!ranges.is_empty());
         if self.compiled.uses_bytes() {
             Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?))
@@ -429,6 +442,8 @@ impl Compiler {
             let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 {
                 self.push_hole(InstHole::Char { c: ranges[0].0 })
             } else {
+                self.extra_inst_bytes +=
+                    ranges.len() * (size_of::<char>() * 2);
                 self.push_hole(InstHole::Ranges { ranges: ranges })
             };
             Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
@@ -548,7 +563,7 @@ impl Compiler {
     }
 
     fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
-        use syntax::hir::RepetitionKind::*;
+        use regex_syntax::hir::RepetitionKind::*;
         match rep.kind {
             ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
             ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
@@ -795,7 +810,9 @@ impl Compiler {
     fn check_size(&self) -> result::Result<(), Error> {
         use std::mem::size_of;
 
-        if self.insts.len() * size_of::<Inst>() > self.size_limit {
+        let size =
+            self.extra_inst_bytes + (self.insts.len() * size_of::<Inst>());
+        if size > self.size_limit {
             Err(Error::CompiledTooBig(self.size_limit))
         } else {
             Ok(())
@@ -927,9 +944,10 @@ impl InstHole {
                 Inst::EmptyLook(InstEmptyLook { goto: goto, look: look })
             }
             InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }),
-            InstHole::Ranges { ref ranges } => {
-                Inst::Ranges(InstRanges { goto: goto, ranges: ranges.clone() })
-            }
+            InstHole::Ranges { ref ranges } => Inst::Ranges(InstRanges {
+                goto: goto,
+                ranges: ranges.clone().into_boxed_slice(),
+            }),
             InstHole::Bytes { start, end } => {
                 Inst::Bytes(InstBytes { goto: goto, start: start, end: end })
             }