6 files changed, 51 insertions, 151 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5cdcdba
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+target
+Cargo.lock
+scripts/tmp
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..296ac17
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,24 @@
+language: rust
+sudo: false
+script:
+  - cargo build --verbose --features no_std
+  - cargo test --verbose --features no_std
+  - cargo clean
+  - cargo build --verbose --features default
+  - cargo test --verbose --features default
+  - cargo bench --verbose --features default
+  - rustdoc --test README.md -L target/debug -L target/debug/deps
+  - cargo doc
+after_success: |
+  [ $TRAVIS_BRANCH = master ] &&
+  [ $TRAVIS_PULL_REQUEST = false ] &&
+  echo '<meta http-equiv=refresh content=0;url=unicode_xid/index.html>' > target/doc/index.html &&
+  pip install ghp-import --user $USER &&
+  $HOME/.local/bin/ghp-import -n target/doc &&
+  git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
+env:
+  global:
+    secure: gTlge+/OQlVkV0R+RThWXeN0aknmS7iUTPBMYKJyRdLz7T2vubw3w80a2CVE87JlpV87A5cVGD+LgR+AhYrhKtvqHb1brMDd99gylBBi2DfV7YapDSwSCuFgVR+FjZfJRcXBtI8po5urUZ84V0WLzRX8SyWqWgoD3oCkSL3Wp3w=
+notifications:
+  email:
+    on_success: never
diff --git a/Cargo.toml b/Cargo.toml
index ae01d15..40a4787 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,8 +1,10 @@
 [package]
 
 name = "unicode-xid"
-version = "0.1.0"
-authors = ["erick.tryzelaar <erick.tryzelaar@gmail.com>"]
+version = "0.0.1"
+authors = ["erick.tryzelaar <erick.tryzelaar@gmail.com>",
+           "kwantam <kwantam@gmail.com>",
+           ]
 
 homepage = "https://github.com/unicode-rs/unicode-xid"
 repository = "https://github.com/unicode-rs/unicode-xid"
@@ -11,9 +13,9 @@ license = "MIT/Apache-2.0"
 keywords = ["text", "unicode", "xid"]
 readme = "README.md"
 description = """
-Determine 
-Determine displayed width of `char` and `str` types
-according to Unicode Standard Annex #11 rules.
+Determine whether characters have the XID_Start
+or XID_Continue properties according to
+Unicode Standard Annex #31.
 """
 
 exclude = [ "target/*", "Cargo.lock" ]
diff --git a/README.md b/README.md
index 5c3acbe..66a57da 100644
--- a/README.md
+++ b/README.md
@@ -1,39 +1,34 @@
-# unicode-derived-property
+# unicode-xid
 
-Determine displayed width of `char` and `str` types according to
-[Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
-rules.
+Determine if a `char` is a valid identifier for a parser and/or lexer according to
+[Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) rules.
 
-[![Build Status](https://travis-ci.org/unicode-rs/unicode-derived-property.svg)](https://travis-ci.org/unicode-rs/unicode-derived-property)
+[![Build Status](https://travis-ci.org/unicode-rs/unicode-xid.svg)](https://travis-ci.org/unicode-rs/unicode-xid)
 
-[Documentation](https://unicode-rs.github.io/unicode-width/unicode_width/index.html)
+[Documentation](https://unicode-rs.github.io/unicode-xid/unicode_xid/index.html)
 
 ```rust
-extern crate unicode_width;
+extern crate unicode_xid;
 
-use unicode_width::UnicodeWidthStr;
+use unicode_xid::UnicodeXID;
 
 fn main() {
-    let teststr = "Ｈｅｌｌｏ, ｗｏｒｌｄ!";
-    let width = UnicodeWidthStr::width(teststr);
-    println!("{}", teststr);
-    println!("The above string is {} columns wide.", width);
-    let width = teststr.width_cjk();
-    println!("The above string is {} columns wide (CJK).", width);
+    let ch = 'a';
+    println!("Is {} a valid start of an identifier? {}", ch, UnicodeXID::is_xid_start(ch));
 }
 ```
 
-## features
+# features
 
-unicode-width supports a `no_std` feature. This eliminates dependence
+unicode-xid supports a `no_std` feature. This eliminates dependence
 on std, and instead uses equivalent functions from core.
 
-## crates.io
+# crates.io
 
 You can use this package in your project by adding the following
 to your `Cargo.toml`:
 
 ```toml
 [dependencies]
-unicode-derived-property = "0.1.1"
+unicode-derived_property = "0.0.1"
 ```
diff --git a/scripts/unicode.py b/scripts/unicode.py
index 6098c33..a9d58d8 100755
--- a/scripts/unicode.py
+++ b/scripts/unicode.py
@@ -13,12 +13,11 @@
 # This script uses the following Unicode tables:
 # - DerivedCoreProperties.txt
 # - ReadMe.txt
-# - UnicodeData.txt
 #
 # Since this should not require frequent updates, we just store this
 # out-of-line and check the unicode.rs file into git.
 
-import fileinput, re, os, sys, operator
+import fileinput, re, os, sys
 
 preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
@@ -35,23 +34,6 @@ preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRI
 #![allow(missing_docs, non_upper_case_globals, non_snake_case)]
 '''
 
-# Mapping taken from Table 12 from:
-# http://www.unicode.org/reports/tr44/#General_Category_Values
-expanded_categories = {
-    'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
-    'Lm': ['L'], 'Lo': ['L'],
-    'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
-    'Nd': ['N'], 'Nl': ['N'], 'No': ['No'],
-    'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
-    'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
-    'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
-    'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
-    'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
-}
-
-# these are the surrogate codepoints, which are not valid rust characters
-surrogate_codepoints = (0xd800, 0xdfff)
-
 def fetch(f):
     if not os.path.exists(os.path.basename(f)):
         os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
@@ -61,92 +43,6 @@ def fetch(f):
         sys.stderr.write("cannot load %s" % f)
         exit(1)
 
-def is_surrogate(n):
-    return surrogate_codepoints[0] <= n <= surrogate_codepoints[1]
-
-def load_unicode_data(f):
-    fetch(f)
-    gencats = {}
-    upperlower = {}
-    lowerupper = {}
-    combines = {}
-    canon_decomp = {}
-    compat_decomp = {}
-
-    udict = {};
-    range_start = -1;
-    for line in fileinput.input(f):
-        data = line.split(';');
-        if len(data) != 15:
-            continue
-        cp = int(data[0], 16);
-        if is_surrogate(cp):
-            continue
-        if range_start >= 0:
-            for i in xrange(range_start, cp):
-                udict[i] = data;
-            range_start = -1;
-        if data[1].endswith(", First>"):
-            range_start = cp;
-            continue;
-        udict[cp] = data;
-
-    for code in udict:
-        [code_org, name, gencat, combine, bidi,
-         decomp, deci, digit, num, mirror,
-         old, iso, upcase, lowcase, titlecase ] = udict[code];
-
-        # generate char to char direct common and simple conversions
-        # uppercase to lowercase
-        if gencat == "Lu" and lowcase != "" and code_org != lowcase:
-            upperlower[code] = int(lowcase, 16)
-
-        # lowercase to uppercase
-        if gencat == "Ll" and upcase != "" and code_org != upcase:
-            lowerupper[code] = int(upcase, 16)
-
-        # store decomposition, if given
-        if decomp != "":
-            if decomp.startswith('<'):
-                seq = []
-                for i in decomp.split()[1:]:
-                    seq.append(int(i, 16))
-                compat_decomp[code] = seq
-            else:
-                seq = []
-                for i in decomp.split():
-                    seq.append(int(i, 16))
-                canon_decomp[code] = seq
-
-        # place letter in categories as appropriate
-        for cat in [gencat, "Assigned"] + expanded_categories.get(gencat, []):
-            if cat not in gencats:
-                gencats[cat] = []
-            gencats[cat].append(code)
-
-        # record combining class, if any
-        if combine != "0":
-            if combine not in combines:
-                combines[combine] = []
-            combines[combine].append(code)
-
-    # generate Not_Assigned from Assigned
-    gencats["Cn"] = gen_unassigned(gencats["Assigned"])
-    # Assigned is not a real category
-    del(gencats["Assigned"])
-    # Other contains Not_Assigned
-    gencats["C"].extend(gencats["Cn"])
-    gencats = group_cats(gencats)
-    combines = to_combines(group_cats(combines))
-
-    return (canon_decomp, compat_decomp, gencats, combines, lowerupper, upperlower)
-
-def group_cats(cats):
-    cats_out = {}
-    for cat in cats:
-        cats_out[cat] = group_cat(cats[cat])
-    return cats_out
-
 def group_cat(cat):
     cat_out = []
     letters = sorted(set(cat))
@@ -171,19 +67,6 @@ def ungroup_cat(cat):
             lo += 1
     return cat_out
 
-def gen_unassigned(assigned):
-    assigned = set(assigned)
-    return ([i for i in range(0, 0xd800) if i not in assigned] +
-            [i for i in range(0xe000, 0x110000) if i not in assigned])
-
-def to_combines(combs):
-    combs_out = []
-    for comb in combs:
-        for (lo, hi) in combs[comb]:
-            combs_out.append((lo, hi, comb))
-    combs_out.sort(key=lambda comb: comb[0])
-    return combs_out
-
 def format_table_content(f, content, indent):
     line = " "*indent
     first = True
@@ -304,15 +187,8 @@ if __name__ == "__main__":
 /// that this version of unicode-derived-property is based on.
 pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
 """ % unicode_version)
-        (canon_decomp, compat_decomp, gencats, combines,
-                lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
-        want_derived = ["XID_Start", "XID_Continue"]
-        derived = load_properties("DerivedCoreProperties.txt", want_derived)
-        props = load_properties("PropList.txt",
-                ["White_Space", "Join_Control", "Noncharacter_Code_Point"])
-
-        # bsearch_range_table is used in all the property modules below
         emit_bsearch_range_table(rf)
 
-        # category tables
+        want_derived = ["XID_Start", "XID_Continue"]
+        derived = load_properties("DerivedCoreProperties.txt", want_derived)
         emit_property_module(rf, "derived_property", derived, want_derived)
diff --git a/src/lib.rs b/src/lib.rs
index 46f3466..c952bd1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -34,7 +34,7 @@
 //!
 //! ```toml
 //! [dependencies]
-//! unicode-derived_property = "0.1.1"
+//! unicode-derived_property = "0.0.1"
 //! ```
 
 #![deny(missing_docs, unsafe_code)]