// Copyright 2021 Code Intelligence GmbH // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package test; import java.nio.charset.Charset; import java.util.ArrayList; final class ModifiedUtf8Encoder { // Encodes a string in the JVM's modified UTF-8 encoding. static public byte[] encode(String value) { // Modified UTF-8 is almost the same as CESU-8, the only difference being that the zero // character is coded on two bytes. byte[] cesuBytes = value.getBytes(Charset.forName("CESU-8")); ArrayList modifiedUtf8Bytes = new ArrayList<>(); for (byte cesuByte : cesuBytes) { if (cesuByte != 0) { modifiedUtf8Bytes.add(cesuByte); } else { modifiedUtf8Bytes.add((byte) 0xC0); modifiedUtf8Bytes.add((byte) 0x80); } } byte[] out = new byte[modifiedUtf8Bytes.size()]; for (int i = 0; i < modifiedUtf8Bytes.size(); i++) { out[i] = modifiedUtf8Bytes.get(i); } return out; } }