1 files changed, 408 insertions, 0 deletions
diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
new file mode 100644
index 00000000..527ab336
--- /dev/null
+++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
@@ -0,0 +1,408 @@
+#region Copyright notice and license
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#endregion
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Google.Protobuf
+{
+    public class JsonTokenizerTest
+    {
+        [Test]
+        public void EmptyObjectValue()
+        {
+            AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject);
+        }
+
+        [Test]
+        public void EmptyArrayValue()
+        {
+            AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray);
+        }
+
+        [Test]
+        [TestCase("foo", "foo")]
+        [TestCase("tab\\t", "tab\t")]
+        [TestCase("line\\nfeed", "line\nfeed")]
+        [TestCase("carriage\\rreturn", "carriage\rreturn")]
+        [TestCase("back\\bspace", "back\bspace")]
+        [TestCase("form\\ffeed", "form\ffeed")]
+        [TestCase("escaped\\/slash", "escaped/slash")]
+        [TestCase("escaped\\\\backslash", "escaped\\backslash")]
+        [TestCase("escaped\\\"quote", "escaped\"quote")]
+        [TestCase("foo {}[] bar", "foo {}[] bar")]
+        [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex
+        [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")]
+        [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")]
+        public void StringValue(string json, string expectedValue)
+        {
+            AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue));
+        }
+
+        // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed
+        // using TestCase as they have no valid UTF-8 representation.
+        // It's unclear exactly how we should handle a mixture of escaped or not: that can't
+        // come from UTF-8 text, but could come from a .NET string. For the moment,
+        // treat it as valid in the obvious way.
+        [Test]
+        public void MixedSurrogatePairs()
+        {
+            string expected = "\ud800\udc00";
+            AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected));
+            AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected));
+        }
+
+        [Test]
+        public void ObjectDepth()
+        {
+            string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }";
+            var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
+            // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it.
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next());
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.Value(1), tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.StartArray, tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array
+            Assert.AreEqual(JsonToken.Value(0), tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.EndArray, tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+        }
+
+        [Test]
+        public void ObjectDepth_WithPushBack()
+        {
+            string json = "{}";
+            var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            var token = tokenizer.Next();
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            // When we push back a "start object", we should effectively be back to the previous depth.
+            tokenizer.PushBack(token);
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            // Read the same token again, and get back to depth 1
+            token = tokenizer.Next();
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+
+            // Now the same in reverse, with EndObject
+            token = tokenizer.Next();
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            tokenizer.PushBack(token);
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            tokenizer.Next();
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+        }
+
+        [Test]
+        [TestCase("embedded tab\t")]
+        [TestCase("embedded CR\r")]
+        [TestCase("embedded LF\n")]
+        [TestCase("embedded bell\u0007")]
+        [TestCase("bad escape\\a")]
+        [TestCase("incomplete escape\\")]
+        [TestCase("incomplete Unicode escape\\u000")]
+        [TestCase("invalid Unicode escape\\u000H")]
+        // Surrogate pair handling, both in raw .NET strings and escaped. We only need
+        // to detect this in strings, as non-ASCII characters anywhere other than in strings
+        // will already lead to parsing errors.
+        [TestCase("\\ud800")]
+        [TestCase("\\udc00")]
+        [TestCase("\\ud800x")]
+        [TestCase("\\udc00x")]
+        [TestCase("\\udc00\\ud800y")]
+        public void InvalidStringValue(string json)
+        {
+            AssertThrowsAfter("\"" + json + "\"");
+        }
+
+        // Tests for invalid strings that can't be expressed in attributes,
+        // as the constants can't be expressed as UTF-8 strings.
+        [Test]
+        public void InvalidSurrogatePairs()
+        {
+            AssertThrowsAfter("\"\ud800x\"");
+            AssertThrowsAfter("\"\udc00y\"");
+            AssertThrowsAfter("\"\udc00\ud800y\"");
+        }
+
+        [Test]
+        [TestCase("0", 0)]
+        [TestCase("-0", 0)] // We don't distinguish between positive and negative 0
+        [TestCase("1", 1)]
+        [TestCase("-1", -1)]
+        // From here on, assume leading sign is okay...
+        [TestCase("1.125", 1.125)]
+        [TestCase("1.0", 1)]
+        [TestCase("1e5", 100000)]
+        [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec
+        [TestCase("1E5", 100000)]
+        [TestCase("1e+5", 100000)]
+        [TestCase("1E-5", 0.00001)]
+        [TestCase("123E-2", 1.23)]
+        [TestCase("123.45E3", 123450)]
+        [TestCase("   1   ", 1)]
+        public void NumberValue(string json, double expectedValue)
+        {
+            AssertTokens(json, JsonToken.Value(expectedValue));
+        }
+
+        [Test]
+        [TestCase("00")]
+        [TestCase(".5")]
+        [TestCase("1.")]
+        [TestCase("1e")]
+        [TestCase("1e-")]
+        [TestCase("--")]
+        [TestCase("--1")]
+        [TestCase("-1.7977e308")]
+        [TestCase("1.7977e308")]
+        public void InvalidNumberValue(string json)
+        {
+            AssertThrowsAfter(json);
+        }
+
+        [Test]
+        [TestCase("nul")]
+        [TestCase("nothing")]
+        [TestCase("truth")]
+        [TestCase("fALSEhood")]
+        public void InvalidLiterals(string json)
+        {
+            AssertThrowsAfter(json);
+        }
+
+        [Test]
+        public void NullValue()
+        {
+            AssertTokens("null", JsonToken.Null);
+        }
+
+        [Test]
+        public void TrueValue()
+        {
+            AssertTokens("true", JsonToken.True);
+        }
+
+        [Test]
+        public void FalseValue()
+        {
+            AssertTokens("false", JsonToken.False);
+        }
+
+        [Test]
+        public void SimpleObject()
+        {
+            AssertTokens("{'x': 'y'}",
+                JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject);
+        }
+        
+        [Test]
+        [TestCase("[10, 20", 3)]
+        [TestCase("[10,", 2)]
+        [TestCase("[10:20]", 2)]
+        [TestCase("[", 1)]
+        [TestCase("[,", 1)]
+        [TestCase("{", 1)]
+        [TestCase("{,", 1)]
+        [TestCase("{[", 1)]
+        [TestCase("{{", 1)]
+        [TestCase("{0", 1)]
+        [TestCase("{null", 1)]
+        [TestCase("{false", 1)]
+        [TestCase("{true", 1)]
+        [TestCase("}", 0)]
+        [TestCase("]", 0)]
+        [TestCase(",", 0)]
+        [TestCase("'foo' 'bar'", 1)]
+        [TestCase(":", 0)]
+        [TestCase("'foo", 0)] // Incomplete string
+        [TestCase("{ 'foo' }", 2)]
+        [TestCase("{ x:1", 1)] // Property names must be quoted
+        [TestCase("{]", 1)]
+        [TestCase("[}", 1)]
+        [TestCase("[1,", 2)]
+        [TestCase("{'x':0]", 3)]
+        [TestCase("{ 'foo': }", 2)]
+        [TestCase("{ 'foo':'bar', }", 3)]
+        public void InvalidStructure(string json, int expectedValidTokens)
+        {
+            // Note: we don't test that the earlier tokens are exactly as expected,
+            // partly because that's hard to parameterize.
+            var reader = new StringReader(json.Replace('\'', '"'));
+            var tokenizer = JsonTokenizer.FromTextReader(reader);
+            for (int i = 0; i < expectedValidTokens; i++)
+            {
+                Assert.IsNotNull(tokenizer.Next());
+            }
+            Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
+        }
+
+        [Test]
+        public void ArrayMixedType()
+        {
+            AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]",
+                JsonToken.StartArray,
+                JsonToken.Value(1),
+                JsonToken.Value("foo"),
+                JsonToken.Null,
+                JsonToken.False,
+                JsonToken.True,
+                JsonToken.StartArray,
+                JsonToken.Value(2),
+                JsonToken.EndArray,
+                JsonToken.StartObject,
+                JsonToken.Name("x"),
+                JsonToken.Value("y"),
+                JsonToken.EndObject,
+                JsonToken.EndArray);
+        }
+
+        [Test]
+        public void ObjectMixedType()
+        {
+            AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true, 
+                           'f': [2], 'g': {'x':'y' }}",
+                JsonToken.StartObject,
+                JsonToken.Name("a"),
+                JsonToken.Value(1),
+                JsonToken.Name("b"),
+                JsonToken.Value("bar"),
+                JsonToken.Name("c"),
+                JsonToken.Null,
+                JsonToken.Name("d"),
+                JsonToken.False,
+                JsonToken.Name("e"),
+                JsonToken.True,
+                JsonToken.Name("f"),
+                JsonToken.StartArray,
+                JsonToken.Value(2),
+                JsonToken.EndArray,
+                JsonToken.Name("g"),
+                JsonToken.StartObject,
+                JsonToken.Name("x"),
+                JsonToken.Value("y"),
+                JsonToken.EndObject,
+                JsonToken.EndObject);
+        }
+
+        [Test]
+        public void NextAfterEndDocumentThrows()
+        {
+            var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
+            Assert.AreEqual(JsonToken.Null, tokenizer.Next());
+            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
+            Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
+        }
+
+        [Test]
+        public void CanPushBackEndDocument()
+        {
+            var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
+            Assert.AreEqual(JsonToken.Null, tokenizer.Next());
+            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
+            tokenizer.PushBack(JsonToken.EndDocument);
+            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
+            Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
+        }
+       
+        /// <summary>
+        /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
+        /// All apostrophes are first converted to double quotes, allowing any tests
+        /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding
+        /// messy string literal escaping. The "end document" token is not specified in the list of 
+        /// expected tokens, but is implicit.
+        /// </summary>
+        private static void AssertTokens(string json, params JsonToken[] expectedTokens)
+        {
+            AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens);
+        }
+
+        /// <summary>
+        /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
+        /// Unlike <see cref="AssertTokens(string, JsonToken[])"/>, this does not perform any character
+        /// replacement on the specified JSON, and should be used when the text contains apostrophes which
+        /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of 
+        /// expected tokens, but is implicit.
+        /// </summary>
+        private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)
+        {
+            var reader = new StringReader(json);
+            var tokenizer = JsonTokenizer.FromTextReader(reader);
+            for (int i = 0; i < expectedTokens.Length; i++)
+            {
+                var actualToken = tokenizer.Next();
+                if (actualToken == JsonToken.EndDocument)
+                {
+                    Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]);
+                }
+                Assert.AreEqual(expectedTokens[i], actualToken);
+            }
+            var finalToken = tokenizer.Next();
+            if (finalToken != JsonToken.EndDocument)
+            {
+                Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken);
+            }
+        }
+
+        private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens)
+        {
+            var reader = new StringReader(json);
+            var tokenizer = JsonTokenizer.FromTextReader(reader);
+            for (int i = 0; i < expectedTokens.Length; i++)
+            {
+                var actualToken = tokenizer.Next();
+                if (actualToken == JsonToken.EndDocument)
+                {
+                    Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]);
+                }
+                Assert.AreEqual(expectedTokens[i], actualToken);
+            }
+            Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
+        }
+    }
+}