summaryrefslogtreecommitdiff
path: root/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt
blob: 37766d993aa39d24f0d435642a0f296fd27c0d84 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package kotlinx.serialization.json.internal

/**
 * Optimized version of StringBuilder that is specific to JSON-encoding.
 *
 * ## Implementation note
 *
 * In order to encode a single string, it should be processed symbol-per-symbol,
 * in order to detect and escape unicode symbols.
 *
 * Doing naively, it drastically slows down strings processing due to factors:
 * * Byte-by-byte copying that does not leverage optimized array copying
 * * A lot of range and flags checks due to Java's compact strings
 *
 * The following technique is used:
 * 1) Instead of storing intermediate result in `StringBuilder`, we store it in
 *    `CharArray` directly, skipping compact strings checks in `StringBuilder`
 * 2) Instead of copying symbols one-by-one, we optimistically copy it in batch using
 *    optimized and intrinsified `string.toCharArray(destination)`.
 *    It copies the content by up-to 8 times faster.
 *    Then we iterate over the char-array and execute single check over
 *    each character that is easily unrolled and vectorized by the inliner.
 *    If escape character is found, we fallback to per-symbol processing.
 *
 * 3) We pool char arrays in order to save excess resizes, allocations
 *    and nulls-out of arrays.
 */
internal actual open class JsonStringBuilder(@JvmField protected var array: CharArray) {
    actual constructor(): this(CharArrayPool.take())

    protected var size = 0

    actual fun append(value: Long) {
        // Can be hand-rolled, but requires a lot of code and corner-cases handling
        append(value.toString())
    }

    actual fun append(ch: Char) {
        ensureAdditionalCapacity(1)
        array[size++] = ch
    }

    actual fun append(string: String) {
        val length = string.length
        ensureAdditionalCapacity(length)
        string.toCharArray(array, size, 0, string.length)
        size += length
    }

    actual fun appendQuoted(string: String) {
        ensureAdditionalCapacity(string.length + 2)
        val arr = array
        var sz = size
        arr[sz++] = '"'
        val length = string.length
        string.toCharArray(arr, sz, 0, length)
        for (i in sz until sz + length) {
            val ch = arr[i].code
            // Do we have unescaped symbols?
            if (ch < ESCAPE_MARKERS.size && ESCAPE_MARKERS[ch] != 0.toByte()) {
                // Go to slow path
                return appendStringSlowPath(i - sz, i, string)
            }
        }
        // Update the state
        // Capacity is not ensured because we didn't hit the slow path and thus guessed it properly in the beginning
        sz += length
        arr[sz++] = '"'
        size = sz
    }

    private fun appendStringSlowPath(firstEscapedChar: Int, currentSize: Int, string: String) {
        var sz = currentSize
        for (i in firstEscapedChar until string.length) {
            /*
             * We ar already on slow path and haven't guessed the capacity properly.
             * Reserve +2 for backslash-escaped symbols on each iteration
             */
            sz = ensureTotalCapacity(sz, 2)
            val ch = string[i].code
            // Do we have unescaped symbols?
            if (ch < ESCAPE_MARKERS.size) {
                /*
                * Escape markers are populated for backslash-escaped symbols.
                * E.g. ESCAPE_MARKERS['\b'] == 'b'.toByte()
                * Everything else is populated with either zeros (no escapes)
                * or ones (unicode escape)
                */
                when (val marker = ESCAPE_MARKERS[ch]) {
                    0.toByte() -> {
                        array[sz++] = ch.toChar()
                    }
                    1.toByte() -> {
                        val escapedString = ESCAPE_STRINGS[ch]!!
                        sz = ensureTotalCapacity(sz, escapedString.length)
                        escapedString.toCharArray(array, sz, 0, escapedString.length)
                        sz += escapedString.length
                        size = sz // Update size so the next resize will take it into account
                    }
                    else -> {
                        array[sz] = '\\'
                        array[sz + 1] = marker.toInt().toChar()
                        sz += 2
                        size = sz // Update size so the next resize will take it into account
                    }
                }
            } else {
                array[sz++] = ch.toChar()
            }
        }
        sz = ensureTotalCapacity(sz, 1)
        array[sz++] = '"'
        size = sz
    }

    actual override fun toString(): String {
        return String(array, 0, size)
    }

    private fun ensureAdditionalCapacity(expected: Int) {
        ensureTotalCapacity(size, expected)
    }

    // Old size is passed and returned separately to avoid excessive [size] field read
    protected open fun ensureTotalCapacity(oldSize: Int, additional: Int): Int {
        val newSize = oldSize + additional
        if (array.size <= newSize) {
            array = array.copyOf(newSize.coerceAtLeast(oldSize * 2))
        }
        return oldSize
    }

    actual open fun release() {
        CharArrayPool.release(array)
    }
}