diff options
author | Shawn O. Pearce <sop@google.com> | 2010-06-02 10:24:59 -0700 |
---|---|---|
committer | Shawn O. Pearce <sop@google.com> | 2010-06-04 12:45:13 -0700 |
commit | 4c02c6a5ac4b946e041af173c378406d670ae83e (patch) | |
tree | d89f643b1293da897ad0ba758d8cf0e40999ff1d | |
parent | c689603a805e1a9ca6521ccf716be42231540be1 (diff) | |
download | gwtorm-4c02c6a5ac4b946e041af173c378406d670ae83e.tar.gz |
Create an encoder for NoSQL secondary index keys
In a NoSQL system we usually don't have a secondary index available
for columns and instead must fake that ourselves by creating a new
row that is a composite of the various columns. To ensure sorting
occurs properly in the NoSQL key range space, we need to encode the
column values to build the composite key.
Change-Id: Ieefe9757f0005bb55e30c3dc0786769bf2d402fb
Signed-off-by: Shawn O. Pearce <sop@google.com>
-rw-r--r-- | src/main/java/com/google/gwtorm/nosql/IndexKeyBuilder.java | 166 | ||||
-rw-r--r-- | src/test/java/com/google/gwtorm/nosql/IndexKeyBuilderTest.java | 82 |
2 files changed, 248 insertions, 0 deletions
diff --git a/src/main/java/com/google/gwtorm/nosql/IndexKeyBuilder.java b/src/main/java/com/google/gwtorm/nosql/IndexKeyBuilder.java new file mode 100644 index 0000000..a670840 --- /dev/null +++ b/src/main/java/com/google/gwtorm/nosql/IndexKeyBuilder.java @@ -0,0 +1,166 @@ +// Copyright 2010 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gwtorm.nosql; + + +import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; + +/** + * Encoder support for {@link IndexFunction} computed strings. + * <p> + * This class provides a string that may contain multiple values, using + * delimiters between fields and big-endian encoded numerics. Sorting the + * resulting strings using unsigned byte orderings produces a stable sorting. + * <p> + * The encoding used by this class relies on having 258 tokens. To get the extra + * 2 tokens within a 256 byte range, escapes are used according to the following + * simple table: + * <ul> + * <li>delimiter = \x00\x01 + * <li>byte \x00 = \x00\xff + * <li>byte \xff = \xff\x00 + * <li>infinity = \xff\xff + * </ul> + * <p> + * Integers are encoded as variable length big-endian values, skipping leading + * zero bytes, prefixed by the number of bytes used to encode them. Therefore 0 + * is encoded as "\x00", and 256 is encoded as "\x02\x01\x00". Negative values + * are encoded in their twos complement encoding and therefore sort after the + * maximum positive value. + * <p> + * Strings and byte arrays supplied by the caller have their \x00 and \xff + * values escaped according to the table above, but are otherwise written as-is + * without a length prefix. + * <p> + * Callers are responsible for inserting {@link #delimiter()} markers at the + * appropriate positions in the sequence. + */ +public class IndexKeyBuilder { + private final ByteArrayOutputStream buf = new ByteArrayOutputStream(); + + /** + * Add a delimiter marker to the string. + */ + public void delimiter() { + buf.write(0x00); + buf.write(0x01); + } + + /** + * Add the special infinity symbol to the string. + * <p> + * The infinity symbol sorts after all other values in the same position. + */ + public void infinity() { + buf.write(0xff); + buf.write(0xff); + } + + /** + * Add a raw sequence of bytes. + * <p> + * The bytes 0x00 and 0xff are escaped by this method according to the + * encoding table described in the class documentation. + * + * @param bin array to copy from. + * @param pos first index to copy. + * @param cnt number of bytes to copy. + */ + public void add(byte[] bin, int pos, int cnt) { + while (0 < cnt--) { + byte b = bin[pos++]; + if (b == 0x00) { + buf.write(0x00); + buf.write(0xff); + + } else if (b == -1) { + buf.write(0xff); + buf.write(0x00); + + } else { + buf.write(b); + } + } + } + + /** + * Add a raw sequence of bytes. + * <p> + * The bytes 0x00 and 0xff are escaped by this method according to the + * encoding table described in the class documentation. + * + * @param bin the complete array to copy. + */ + public void add(byte[] bin) { + add(bin, 0, bin.length); + } + + /** + * Encode a string into UTF-8 and append as a sequence of bytes. + * + * @param str the string to encode and append. + */ + public void add(String str) { + try { + add(str.getBytes("UTF-8")); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException("JVM does not support UTF-8", e); + } + } + + /** + * Add a single character as though it were part of a UTF-8 string. + * + * @param ch the character to encode and append. + */ + public void add(char ch) { + if (ch == 0x00) { + buf.write(0x00); + buf.write(0xff); + + } else if (ch <= 255) { + buf.write(ch); + + } else { + add(Character.toString(ch)); + } + } + + /** + * Add an integer value as a big-endian variable length integer. + * + * @param val the value to add. + */ + public void add(long val) { + final byte[] t = new byte[9]; + int i = t.length; + while (val != 0) { + t[--i] = (byte) (val & 0xff); + val >>>= 8; + } + t[i - 1] = (byte) (t.length - i); + buf.write(t, i - 1, t.length - i + 1); + } + + /** + * Obtain a copy of the internal storage array. + * + * @return the current state of this, converted into a flat byte array. + */ + public byte[] toByteArray() { + return buf.toByteArray(); + } +} diff --git a/src/test/java/com/google/gwtorm/nosql/IndexKeyBuilderTest.java b/src/test/java/com/google/gwtorm/nosql/IndexKeyBuilderTest.java new file mode 100644 index 0000000..e0f8028 --- /dev/null +++ b/src/test/java/com/google/gwtorm/nosql/IndexKeyBuilderTest.java @@ -0,0 +1,82 @@ +// Copyright 2010 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gwtorm.nosql; + +import junit.framework.TestCase; + +public class IndexKeyBuilderTest extends TestCase { + public void testInt() { + IndexKeyBuilder ib; + + ib = new IndexKeyBuilder(); + ib.add(0); + assertEquals(new byte[] {0x00}, ib); + + ib = new IndexKeyBuilder(); + ib.add(1); + assertEquals(new byte[] {0x01, 0x01}, ib); + + ib = new IndexKeyBuilder(); + ib.add(256); + assertEquals(new byte[] {0x02, 0x01, 0x00}, ib); + } + + public void testDelimiter() { + IndexKeyBuilder ib = new IndexKeyBuilder(); + ib.delimiter(); + assertEquals(new byte[] {0x00, 0x01}, ib); + } + + public void testStringASCII() { + IndexKeyBuilder ib = new IndexKeyBuilder(); + ib.add("hi"); + assertEquals(new byte[] {'h', 'i'}, ib); + } + + public void testStringNUL() { + IndexKeyBuilder ib = new IndexKeyBuilder(); + ib.add("\0"); + assertEquals(new byte[] {0x00, (byte) 0xff}, ib); + } + + public void testStringFF() { + IndexKeyBuilder ib = new IndexKeyBuilder(); + ib.add(new byte[] {(byte) 0xff}); + assertEquals(new byte[] {(byte) 0xff, 0x00}, ib); + } + + public void testInfinity() { + IndexKeyBuilder ib = new IndexKeyBuilder(); + ib.infinity(); + assertEquals(new byte[] {(byte) 0xff, (byte) 0xff}, ib); + } + + private static void assertEquals(byte[] exp, IndexKeyBuilder ic) { + assertEquals(toString(exp), toString(ic.toByteArray())); + } + + private static String toString(byte[] bin) { + StringBuilder dst = new StringBuilder(bin.length * 2); + for (byte b : bin) { + dst.append(hexchar[(b >>> 4) & 0x0f]); + dst.append(hexchar[b & 0x0f]); + } + return dst.toString(); + } + + private static final char[] hexchar = + {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', // + 'a', 'b', 'c', 'd', 'e', 'f'}; +} |