summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShawn O. Pearce <sop@google.com>2010-06-02 10:24:59 -0700
committerShawn O. Pearce <sop@google.com>2010-06-04 12:45:13 -0700
commit4c02c6a5ac4b946e041af173c378406d670ae83e (patch)
treed89f643b1293da897ad0ba758d8cf0e40999ff1d
parentc689603a805e1a9ca6521ccf716be42231540be1 (diff)
downloadgwtorm-4c02c6a5ac4b946e041af173c378406d670ae83e.tar.gz
Create an encoder for NoSQL secondary index keys
In a NoSQL system we usually don't have a secondary index available for columns and instead must fake that ourselves by creating a new row that is a composite of the various columns. To ensure sorting occurs properly in the NoSQL key range space, we need to encode the column values to build the composite key. Change-Id: Ieefe9757f0005bb55e30c3dc0786769bf2d402fb Signed-off-by: Shawn O. Pearce <sop@google.com>
-rw-r--r--src/main/java/com/google/gwtorm/nosql/IndexKeyBuilder.java166
-rw-r--r--src/test/java/com/google/gwtorm/nosql/IndexKeyBuilderTest.java82
2 files changed, 248 insertions, 0 deletions
diff --git a/src/main/java/com/google/gwtorm/nosql/IndexKeyBuilder.java b/src/main/java/com/google/gwtorm/nosql/IndexKeyBuilder.java
new file mode 100644
index 0000000..a670840
--- /dev/null
+++ b/src/main/java/com/google/gwtorm/nosql/IndexKeyBuilder.java
@@ -0,0 +1,166 @@
+// Copyright 2010 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gwtorm.nosql;
+
+
+import java.io.ByteArrayOutputStream;
+import java.io.UnsupportedEncodingException;
+
+/**
+ * Encoder support for {@link IndexFunction} computed strings.
+ * <p>
+ * This class provides a string that may contain multiple values, using
+ * delimiters between fields and big-endian encoded numerics. Sorting the
+ * resulting strings using unsigned byte orderings produces a stable sorting.
+ * <p>
+ * The encoding used by this class relies on having 258 tokens. To get the extra
+ * 2 tokens within a 256 byte range, escapes are used according to the following
+ * simple table:
+ * <ul>
+ * <li>delimiter = \x00\x01
+ * <li>byte \x00 = \x00\xff
+ * <li>byte \xff = \xff\x00
+ * <li>infinity = \xff\xff
+ * </ul>
+ * <p>
+ * Integers are encoded as variable length big-endian values, skipping leading
+ * zero bytes, prefixed by the number of bytes used to encode them. Therefore 0
+ * is encoded as "\x00", and 256 is encoded as "\x02\x01\x00". Negative values
+ * are encoded in their twos complement encoding and therefore sort after the
+ * maximum positive value.
+ * <p>
+ * Strings and byte arrays supplied by the caller have their \x00 and \xff
+ * values escaped according to the table above, but are otherwise written as-is
+ * without a length prefix.
+ * <p>
+ * Callers are responsible for inserting {@link #delimiter()} markers at the
+ * appropriate positions in the sequence.
+ */
+public class IndexKeyBuilder {
+ private final ByteArrayOutputStream buf = new ByteArrayOutputStream();
+
+ /**
+ * Add a delimiter marker to the string.
+ */
+ public void delimiter() {
+ buf.write(0x00);
+ buf.write(0x01);
+ }
+
+ /**
+ * Add the special infinity symbol to the string.
+ * <p>
+ * The infinity symbol sorts after all other values in the same position.
+ */
+ public void infinity() {
+ buf.write(0xff);
+ buf.write(0xff);
+ }
+
+ /**
+ * Add a raw sequence of bytes.
+ * <p>
+ * The bytes 0x00 and 0xff are escaped by this method according to the
+ * encoding table described in the class documentation.
+ *
+ * @param bin array to copy from.
+ * @param pos first index to copy.
+ * @param cnt number of bytes to copy.
+ */
+ public void add(byte[] bin, int pos, int cnt) {
+ while (0 < cnt--) {
+ byte b = bin[pos++];
+ if (b == 0x00) {
+ buf.write(0x00);
+ buf.write(0xff);
+
+ } else if (b == -1) {
+ buf.write(0xff);
+ buf.write(0x00);
+
+ } else {
+ buf.write(b);
+ }
+ }
+ }
+
+ /**
+ * Add a raw sequence of bytes.
+ * <p>
+ * The bytes 0x00 and 0xff are escaped by this method according to the
+ * encoding table described in the class documentation.
+ *
+ * @param bin the complete array to copy.
+ */
+ public void add(byte[] bin) {
+ add(bin, 0, bin.length);
+ }
+
+ /**
+ * Encode a string into UTF-8 and append as a sequence of bytes.
+ *
+ * @param str the string to encode and append.
+ */
+ public void add(String str) {
+ try {
+ add(str.getBytes("UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("JVM does not support UTF-8", e);
+ }
+ }
+
+ /**
+ * Add a single character as though it were part of a UTF-8 string.
+ *
+ * @param ch the character to encode and append.
+ */
+ public void add(char ch) {
+ if (ch == 0x00) {
+ buf.write(0x00);
+ buf.write(0xff);
+
+ } else if (ch <= 255) {
+ buf.write(ch);
+
+ } else {
+ add(Character.toString(ch));
+ }
+ }
+
+ /**
+ * Add an integer value as a big-endian variable length integer.
+ *
+ * @param val the value to add.
+ */
+ public void add(long val) {
+ final byte[] t = new byte[9];
+ int i = t.length;
+ while (val != 0) {
+ t[--i] = (byte) (val & 0xff);
+ val >>>= 8;
+ }
+ t[i - 1] = (byte) (t.length - i);
+ buf.write(t, i - 1, t.length - i + 1);
+ }
+
+ /**
+ * Obtain a copy of the internal storage array.
+ *
+ * @return the current state of this, converted into a flat byte array.
+ */
+ public byte[] toByteArray() {
+ return buf.toByteArray();
+ }
+}
diff --git a/src/test/java/com/google/gwtorm/nosql/IndexKeyBuilderTest.java b/src/test/java/com/google/gwtorm/nosql/IndexKeyBuilderTest.java
new file mode 100644
index 0000000..e0f8028
--- /dev/null
+++ b/src/test/java/com/google/gwtorm/nosql/IndexKeyBuilderTest.java
@@ -0,0 +1,82 @@
+// Copyright 2010 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gwtorm.nosql;
+
+import junit.framework.TestCase;
+
+public class IndexKeyBuilderTest extends TestCase {
+ public void testInt() {
+ IndexKeyBuilder ib;
+
+ ib = new IndexKeyBuilder();
+ ib.add(0);
+ assertEquals(new byte[] {0x00}, ib);
+
+ ib = new IndexKeyBuilder();
+ ib.add(1);
+ assertEquals(new byte[] {0x01, 0x01}, ib);
+
+ ib = new IndexKeyBuilder();
+ ib.add(256);
+ assertEquals(new byte[] {0x02, 0x01, 0x00}, ib);
+ }
+
+ public void testDelimiter() {
+ IndexKeyBuilder ib = new IndexKeyBuilder();
+ ib.delimiter();
+ assertEquals(new byte[] {0x00, 0x01}, ib);
+ }
+
+ public void testStringASCII() {
+ IndexKeyBuilder ib = new IndexKeyBuilder();
+ ib.add("hi");
+ assertEquals(new byte[] {'h', 'i'}, ib);
+ }
+
+ public void testStringNUL() {
+ IndexKeyBuilder ib = new IndexKeyBuilder();
+ ib.add("\0");
+ assertEquals(new byte[] {0x00, (byte) 0xff}, ib);
+ }
+
+ public void testStringFF() {
+ IndexKeyBuilder ib = new IndexKeyBuilder();
+ ib.add(new byte[] {(byte) 0xff});
+ assertEquals(new byte[] {(byte) 0xff, 0x00}, ib);
+ }
+
+ public void testInfinity() {
+ IndexKeyBuilder ib = new IndexKeyBuilder();
+ ib.infinity();
+ assertEquals(new byte[] {(byte) 0xff, (byte) 0xff}, ib);
+ }
+
+ private static void assertEquals(byte[] exp, IndexKeyBuilder ic) {
+ assertEquals(toString(exp), toString(ic.toByteArray()));
+ }
+
+ private static String toString(byte[] bin) {
+ StringBuilder dst = new StringBuilder(bin.length * 2);
+ for (byte b : bin) {
+ dst.append(hexchar[(b >>> 4) & 0x0f]);
+ dst.append(hexchar[b & 0x0f]);
+ }
+ return dst.toString();
+ }
+
+ private static final char[] hexchar =
+ {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
+ 'a', 'b', 'c', 'd', 'e', 'f'};
+}