summaryrefslogtreecommitdiff
path: root/runtime/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt')
-rw-r--r--runtime/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt334
1 files changed, 334 insertions, 0 deletions
diff --git a/runtime/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt b/runtime/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt
new file mode 100644
index 00000000..7ba12d5c
--- /dev/null
+++ b/runtime/commonMain/src/kotlinx/serialization/json/internal/JsonReader.kt
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2018 JetBrains s.r.o.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kotlinx.serialization.json.internal
+
+import kotlinx.serialization.SharedImmutable
+import kotlinx.serialization.json.*
+import kotlinx.serialization.json.internal.EscapeCharMappings.ESCAPE_2_CHAR
+import kotlin.jvm.*
+
+// special strings
+internal const val NULL = "null"
+
+// special chars
+internal const val COMMA = ','
+internal const val COLON = ':'
+internal const val BEGIN_OBJ = '{'
+internal const val END_OBJ = '}'
+internal const val BEGIN_LIST = '['
+internal const val END_LIST = ']'
+internal const val STRING = '"'
+internal const val STRING_ESC = '\\'
+
+internal const val INVALID = 0.toChar()
+internal const val UNICODE_ESC = 'u'
+
+// token classes
+internal const val TC_OTHER: Byte = 0
+internal const val TC_STRING: Byte = 1
+internal const val TC_STRING_ESC: Byte = 2
+internal const val TC_WS: Byte = 3
+internal const val TC_COMMA: Byte = 4
+internal const val TC_COLON: Byte = 5
+internal const val TC_BEGIN_OBJ: Byte = 6
+internal const val TC_END_OBJ: Byte = 7
+internal const val TC_BEGIN_LIST: Byte = 8
+internal const val TC_END_LIST: Byte = 9
+internal const val TC_NULL: Byte = 10
+internal const val TC_INVALID: Byte = 11
+internal const val TC_EOF: Byte = 12
+
+// mapping from chars to token classes
+private const val CTC_MAX = 0x7e
+
+// mapping from escape chars real chars
+private const val ESC2C_MAX = 0x75
+
+@SharedImmutable
+internal val C2TC = ByteArray(CTC_MAX).apply {
+ for (i in 0..0x20) {
+ initC2TC(i, TC_INVALID)
+ }
+
+ initC2TC(0x09, TC_WS)
+ initC2TC(0x0a, TC_WS)
+ initC2TC(0x0d, TC_WS)
+ initC2TC(0x20, TC_WS)
+ initC2TC(COMMA, TC_COMMA)
+ initC2TC(COLON, TC_COLON)
+ initC2TC(BEGIN_OBJ, TC_BEGIN_OBJ)
+ initC2TC(END_OBJ, TC_END_OBJ)
+ initC2TC(BEGIN_LIST, TC_BEGIN_LIST)
+ initC2TC(END_LIST, TC_END_LIST)
+ initC2TC(STRING, TC_STRING)
+ initC2TC(STRING_ESC, TC_STRING_ESC)
+}
+
+// object instead of @SharedImmutable because there is mutual initialization in [initC2ESC]
+internal object EscapeCharMappings {
+ @JvmField
+ public val ESCAPE_2_CHAR = CharArray(ESC2C_MAX)
+
+ init {
+ for (i in 0x00..0x1f) {
+ initC2ESC(i, UNICODE_ESC)
+ }
+
+ initC2ESC(0x08, 'b')
+ initC2ESC(0x09, 't')
+ initC2ESC(0x0a, 'n')
+ initC2ESC(0x0c, 'f')
+ initC2ESC(0x0d, 'r')
+ initC2ESC('/', '/')
+ initC2ESC(STRING, STRING)
+ initC2ESC(STRING_ESC, STRING_ESC)
+ }
+
+ private fun initC2ESC(c: Int, esc: Char) {
+ if (esc != UNICODE_ESC) ESCAPE_2_CHAR[esc.toInt()] = c.toChar()
+ }
+
+ private fun initC2ESC(c: Char, esc: Char) = initC2ESC(c.toInt(), esc)
+}
+
+private fun ByteArray.initC2TC(c: Int, cl: Byte) {
+ this[c] = cl
+}
+
+private fun ByteArray.initC2TC(c: Char, cl: Byte) {
+ initC2TC(c.toInt(), cl)
+}
+
+internal fun charToTokenClass(c: Char) = if (c.toInt() < CTC_MAX) C2TC[c.toInt()] else TC_OTHER
+
+internal fun escapeToChar(c: Int): Char = if (c < ESC2C_MAX) ESCAPE_2_CHAR[c] else INVALID
+
+
+// Streaming JSON reader
+internal class JsonReader(private val source: String) {
+
+ @JvmField
+ var currentPosition: Int = 0 // position in source
+
+ @JvmField
+ var tokenClass: Byte = TC_EOF
+
+ public val isDone: Boolean get() = tokenClass == TC_EOF
+
+ public val canBeginValue: Boolean
+ get() = when (tokenClass) {
+ TC_BEGIN_LIST, TC_BEGIN_OBJ, TC_OTHER, TC_STRING, TC_NULL -> true
+ else -> false
+ }
+
+ // updated by nextToken
+ private var tokenPosition: Int = 0
+
+ // update by nextString/nextLiteral
+ private var offset = -1 // when offset >= 0 string is in source, otherwise in buf
+ private var length = 0 // length of string
+ private var buf = CharArray(16) // only used for strings with escapes
+
+ init {
+ nextToken()
+ }
+
+ internal inline fun requireTokenClass(expected: Byte, errorMessage: (Char) -> String) {
+ if (tokenClass != expected) fail(tokenPosition, errorMessage(tokenClass.toChar()))
+ }
+
+ fun takeString(): String {
+ if (tokenClass != TC_OTHER && tokenClass != TC_STRING) fail(tokenPosition, "Expected string or non-null literal")
+ val prevStr = if (offset < 0)
+ String(buf, 0, length) else
+ source.substring(offset, offset + length)
+ nextToken()
+ return prevStr
+ }
+
+ private fun append(ch: Char) {
+ if (length >= buf.size) buf = buf.copyOf(2 * buf.size)
+ buf[length++] = ch
+ }
+
+ // initializes buf usage upon the first encountered escaped char
+ private fun appendRange(source: String, fromIndex: Int, toIndex: Int) {
+ val addLen = toIndex - fromIndex
+ val oldLen = length
+ val newLen = oldLen + addLen
+ if (newLen > buf.size) buf = buf.copyOf(newLen.coerceAtLeast(2 * buf.size))
+ for (i in 0 until addLen) buf[oldLen + i] = source[fromIndex + i]
+ length += addLen
+ }
+
+ fun nextToken() {
+ val source = source
+ var curPos = currentPosition
+ val maxLen = source.length
+ while (true) {
+ if (curPos >= maxLen) {
+ tokenPosition = curPos
+ tokenClass = TC_EOF
+ return
+ }
+ val ch = source[curPos]
+ val tc = charToTokenClass(ch)
+ when (tc) {
+ TC_WS -> curPos++ // skip whitespace
+ TC_OTHER -> {
+ nextLiteral(source, curPos)
+ return
+ }
+ TC_STRING -> {
+ nextString(source, curPos)
+ return
+ }
+ else -> {
+ this.tokenPosition = curPos
+ this.tokenClass = tc
+ this.currentPosition = curPos + 1
+ return
+ }
+ }
+ }
+ }
+
+ private fun nextLiteral(source: String, startPos: Int) {
+ tokenPosition = startPos
+ offset = startPos
+ var curPos = startPos
+ val maxLen = source.length
+ while (true) {
+ curPos++
+ if (curPos >= maxLen || charToTokenClass(source[curPos]) != TC_OTHER) break
+ }
+ this.currentPosition = curPos
+ length = curPos - offset
+ tokenClass = if (rangeEquals(source, offset, length, NULL)) TC_NULL else TC_OTHER
+ }
+
+ private fun nextString(source: String, startPos: Int) {
+ tokenPosition = startPos
+ length = 0 // in buffer
+ var curPos = startPos + 1
+ var lastPos = curPos
+ val maxLen = source.length
+ parse@ while (true) {
+ if (curPos >= maxLen) fail(curPos, "Unexpected end in string")
+ if (source[curPos] == STRING) {
+ break@parse
+ } else if (source[curPos] == STRING_ESC) {
+ appendRange(source, lastPos, curPos)
+ val newPos = appendEsc(source, curPos + 1)
+ curPos = newPos
+ lastPos = newPos
+ } else {
+ curPos++
+ }
+ }
+ if (lastPos == startPos + 1) {
+ // there was no escaped chars
+ this.offset = lastPos
+ this.length = curPos - lastPos
+ } else {
+ // some escaped chars were there
+ appendRange(source, lastPos, curPos)
+ this.offset = -1
+ }
+ this.currentPosition = curPos + 1
+ tokenClass = TC_STRING
+ }
+
+ private fun appendEsc(source: String, startPos: Int): Int {
+ var curPos = startPos
+ require(curPos < source.length, curPos) { "Unexpected end after escape char" }
+ val curChar = source[curPos++]
+ if (curChar == UNICODE_ESC) {
+ curPos = appendHex(source, curPos)
+ } else {
+ val c = escapeToChar(curChar.toInt())
+ require(c != INVALID, curPos) { "Invalid escaped char '$curChar'" }
+ append(c)
+ }
+ return curPos
+ }
+
+ private fun appendHex(source: String, startPos: Int): Int {
+ var curPos = startPos
+ append(
+ ((fromHexChar(source, curPos++) shl 12) +
+ (fromHexChar(source, curPos++) shl 8) +
+ (fromHexChar(source, curPos++) shl 4) +
+ fromHexChar(source, curPos++)).toChar()
+ )
+ return curPos
+ }
+
+ fun skipElement() {
+ if (tokenClass != TC_BEGIN_OBJ && tokenClass != TC_BEGIN_LIST) {
+ nextToken()
+ return
+ }
+ val tokenStack = mutableListOf<Byte>()
+ do {
+ when (tokenClass) {
+ TC_BEGIN_LIST, TC_BEGIN_OBJ -> tokenStack.add(tokenClass)
+ TC_END_LIST -> {
+ if (tokenStack.last() != TC_BEGIN_LIST) throw JsonParsingException(currentPosition, "found ] instead of }")
+ tokenStack.removeAt(tokenStack.size - 1)
+ }
+ TC_END_OBJ -> {
+ if (tokenStack.last() != TC_BEGIN_OBJ) throw JsonParsingException(currentPosition, "found } instead of ]")
+ tokenStack.removeAt(tokenStack.size - 1)
+ }
+ }
+ nextToken()
+ } while (tokenStack.isNotEmpty())
+ }
+
+ override fun toString(): String {
+ return "JsonReader(source='$source', currentPosition=$currentPosition, tokenClass=$tokenClass, tokenPosition=$tokenPosition, offset=$offset)"
+ }
+}
+
+// Utility functions
+private fun fromHexChar(source: String, curPos: Int): Int {
+ require(curPos < source.length, curPos) { "Unexpected end in unicode escape" }
+ val curChar = source[curPos]
+ return when (curChar) {
+ in '0'..'9' -> curChar.toInt() - '0'.toInt()
+ in 'a'..'f' -> curChar.toInt() - 'a'.toInt() + 10
+ in 'A'..'F' -> curChar.toInt() - 'A'.toInt() + 10
+ else -> fail(curPos, "Invalid toHexChar char '$curChar' in unicode escape")
+ }
+}
+
+private fun rangeEquals(source: String, start: Int, length: Int, str: String): Boolean {
+ val n = str.length
+ if (length != n) return false
+ for (i in 0 until n) if (source[start + i] != str[i]) return false
+ return true
+}
+
+internal inline fun require(condition: Boolean, position: Int, msg: () -> String) {
+ if (!condition) fail(position, msg())
+}
+
+@Suppress("NOTHING_TO_INLINE")
+internal inline fun fail(position: Int, msg: String): Nothing {
+ throw JsonParsingException(position, msg)
+}