aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Ulitzsch <vincent.ulitzsch@live.de>2019-12-13 20:07:39 +0100
committerjonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com>2019-12-13 11:07:39 -0800
commit7abdcd9b8fde67ac219713d467ce75ce8537c3d3 (patch)
treef3f3236dbaebf7129aacd324b6333b2f962f8e78
parentb03bc0945de65e7b811eab12a3680eca42640e35 (diff)
downloadoss-fuzz-7abdcd9b8fde67ac219713d467ce75ce8537c3d3.tar.gz
Add initial integration xerces-c (#3083)
-rwxr-xr-xprojects/xerces-c/Dockerfile21
-rwxr-xr-xprojects/xerces-c/build.sh41
-rwxr-xr-xprojects/xerces-c/parse_target.cpp28
-rw-r--r--projects/xerces-c/parse_target_proto.cpp45
-rwxr-xr-xprojects/xerces-c/project.yaml9
-rwxr-xr-xprojects/xerces-c/xerces_fuzz_common.cpp47
-rw-r--r--projects/xerces-c/xerces_fuzz_common.h23
-rwxr-xr-xprojects/xerces-c/xml.proto339
-rw-r--r--projects/xerces-c/xmlProtoConverter.cpp718
-rw-r--r--projects/xerces-c/xmlProtoConverter.h101
10 files changed, 1372 insertions, 0 deletions
diff --git a/projects/xerces-c/Dockerfile b/projects/xerces-c/Dockerfile
new file mode 100755
index 000000000..9df098e50
--- /dev/null
+++ b/projects/xerces-c/Dockerfile
@@ -0,0 +1,21 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+FROM gcr.io/oss-fuzz-base/base-builder
+MAINTAINER vincent.ulitzsch@live.de
+RUN apt-get update && apt-get install -y make autoconf automake libtool wget zlib1g-dev libtool ninja-build cmake subversion
+RUN svn co https://svn.apache.org/repos/asf/xerces/c/trunk $SRC/xerces-c
+RUN git clone --depth 1 https://github.com/google/libprotobuf-mutator.git
+RUN (mkdir LPM && cd LPM && cmake ../libprotobuf-mutator -GNinja -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON -DLIB_PROTO_MUTATOR_TESTING=OFF -DCMAKE_BUILD_TYPE=Release && ninja)
+COPY *.c *.options build.sh *.h *.cc *.cpp *.proto $SRC/
diff --git a/projects/xerces-c/build.sh b/projects/xerces-c/build.sh
new file mode 100755
index 000000000..bc02e982d
--- /dev/null
+++ b/projects/xerces-c/build.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+#https://github.com/linux-on-ibm-z/docs/wiki/Building-Xerces
+set -e
+cd $SRC/xerces-c
+./reconf
+./configure
+make -j
+
+cd $SRC
+
+$CXX $CXXFLAGS $LIB_FUZZING_ENGINE -std=c++11 \
+ -I. -Ixerces-c/src \
+ xerces_fuzz_common.cpp parse_target.cpp -o $OUT/parse_target \
+ xerces-c/src/.libs/libxerces-c.a
+
+if [[ $CFLAGS != *sanitize=memory* ]]; then
+ rm -rf genfiles && mkdir genfiles && LPM/external.protobuf/bin/protoc xml.proto --cpp_out=genfiles
+
+ $CXX $CXXFLAGS $LIB_FUZZING_ENGINE -std=c++11 \
+ -I. -I xerces-c/src -Ixerces-c/build/src genfiles/xml.pb.cc xmlProtoConverter.cpp xerces_fuzz_common.cpp parse_target_proto.cpp \
+ -I libprotobuf-mutator/ \
+ -I genfiles \
+ -I LPM/external.protobuf/include \
+ -o $OUT/parse_target_proto xerces-c/src/.libs/libxerces-c.a \
+ LPM/src/libfuzzer/libprotobuf-mutator-libfuzzer.a \
+ LPM/src/libprotobuf-mutator.a \
+ LPM/external.protobuf/lib/libprotobuf.a
+fi
diff --git a/projects/xerces-c/parse_target.cpp b/projects/xerces-c/parse_target.cpp
new file mode 100755
index 000000000..5e976765a
--- /dev/null
+++ b/projects/xerces-c/parse_target.cpp
@@ -0,0 +1,28 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#include "xerces_fuzz_common.h"
+
+#include "xercesc/framework/MemBufInputSource.hpp"
+#include "xercesc/parsers/SAXParser.hpp"
+#include "xercesc/util/OutOfMemoryException.hpp"
+
+using namespace xercesc_3_2;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ parseInMemory(Data, Size);
+ return 0;
+}
diff --git a/projects/xerces-c/parse_target_proto.cpp b/projects/xerces-c/parse_target_proto.cpp
new file mode 100644
index 000000000..b1fd33cbe
--- /dev/null
+++ b/projects/xerces-c/parse_target_proto.cpp
@@ -0,0 +1,45 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#include "xerces_fuzz_common.h"
+#include "xmlProtoConverter.h"
+
+#include "xercesc/framework/MemBufInputSource.hpp"
+#include "xercesc/parsers/SAXParser.hpp"
+#include "xercesc/util/OutOfMemoryException.hpp"
+
+#include "genfiles/xml.pb.h"
+
+#include "src/libfuzzer/libfuzzer_macro.h"
+
+#include <iostream>
+
+namespace {
+ protobuf_mutator::protobuf::LogSilencer log_silincer;
+ void ignore(void* ctx, const char* msg, ...) {}
+
+ template <class T, class D>
+ std::unique_ptr<T, D> MakeUnique(T* obj, D del) {
+ return {obj, del};
+ }
+}
+
+using namespace xercesc_3_2;
+
+DEFINE_PROTO_FUZZER(const xmlProtoFuzzer::XmlDocument& xmlDocument) {
+ std::string xmlData = xmlProtoFuzzer::ProtoConverter().protoToString(xmlDocument);
+ parseInMemory((const uint8_t *)xmlData.c_str(), xmlData.size());
+}
diff --git a/projects/xerces-c/project.yaml b/projects/xerces-c/project.yaml
new file mode 100755
index 000000000..41da7a725
--- /dev/null
+++ b/projects/xerces-c/project.yaml
@@ -0,0 +1,9 @@
+homepage: "https://xerces.apache.org/"
+primary_contact: "vincent.ulitzsch@live.de"
+auto_ccs:
+ - "vincent.ulitzsch@live.de"
+ - "bshas3@gmail.com"
+sanitizers:
+ - address
+ - memory
+ - undefined
diff --git a/projects/xerces-c/xerces_fuzz_common.cpp b/projects/xerces-c/xerces_fuzz_common.cpp
new file mode 100755
index 000000000..a76b383a8
--- /dev/null
+++ b/projects/xerces-c/xerces_fuzz_common.cpp
@@ -0,0 +1,47 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#include "xerces_fuzz_common.h"
+
+using namespace xercesc_3_2;
+static bool initialized = false;
+
+void parseInMemory(const uint8_t *Data, size_t Size)
+{
+ if (!initialized)
+ {
+ XMLPlatformUtils::Initialize();
+ initialized = true;
+ }
+ SAXParser::ValSchemes valScheme = SAXParser::Val_Auto;
+ bool doNamespaces = false;
+ bool doSchema = false;
+ bool schemaFullChecking = false;
+ SAXParser *parser = new SAXParser;
+ parser->setValidationScheme(valScheme);
+ parser->setDoNamespaces(doNamespaces);
+ parser->setDoSchema(doSchema);
+ parser->setHandleMultipleImports(true);
+ parser->setValidationSchemaFullChecking(schemaFullChecking);
+ static const char *gMemBufId = "prodInfo";
+
+ MemBufInputSource *memBufIS = new MemBufInputSource(
+ (const XMLByte *)Data, Size, gMemBufId, false);
+ parser->parse(*memBufIS);
+ delete parser;
+ delete memBufIS;
+ //XMLPlatformUtils::Terminate();
+}
diff --git a/projects/xerces-c/xerces_fuzz_common.h b/projects/xerces-c/xerces_fuzz_common.h
new file mode 100644
index 000000000..9eaf88bd7
--- /dev/null
+++ b/projects/xerces-c/xerces_fuzz_common.h
@@ -0,0 +1,23 @@
+/*
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+*/
+#pragma once
+
+#include "xercesc/parsers/SAXParser.hpp"
+#include "xercesc/framework/MemBufInputSource.hpp"
+#include "xercesc/util/OutOfMemoryException.hpp"
+
+void parseInMemory(const uint8_t *Data, size_t Size); \ No newline at end of file
diff --git a/projects/xerces-c/xml.proto b/projects/xerces-c/xml.proto
new file mode 100755
index 000000000..75e54b6f0
--- /dev/null
+++ b/projects/xerces-c/xml.proto
@@ -0,0 +1,339 @@
+/*
+ * Copyright (C) 2019 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+message Misc {
+ oneof misc_oneof {
+ string comment = 1;
+ ProcessingInstruction inst = 2;
+ }
+}
+
+message PEReference {
+ string name = 1;
+}
+
+message ElementDecl {
+ enum ContentSpec {
+ EMPTY = 0;
+ ANY = 1;
+ FUZZ = 2;
+ MIXED = 3;
+ CHILDREN = 4;
+ }
+ string name = 1;
+ ContentSpec spec = 2;
+ repeated string cdata = 3;
+}
+
+message AttrType {
+ enum Type {
+ CDATA = 0;
+ ID = 1;
+ IDREF = 2;
+ IDREFS = 3;
+ ENTITY = 4;
+ ENTITIES = 5;
+ NMTOKEN = 6;
+ NMTOKENS = 7;
+ }
+ Type ty = 1;
+}
+
+message EnumeratedType {
+ repeated string names = 1;
+}
+
+message AttrListDecl {
+ string name = 1;
+ AttrType atype = 2;
+ EnumeratedType etype = 3;
+ DefaultDecl def = 4;
+}
+
+message ExternalId {
+ enum Type {
+ SYSTEM = 0;
+ PUBLIC = 1;
+ FUZZ = 2;
+ }
+ Type type = 1;
+ string system = 2;
+ string pub = 3;
+}
+
+message AttValue {
+ enum Type {
+ ENTITY = 0;
+ CHAR = 1;
+ FUZZ = 2;
+ }
+ Type type = 1;
+ repeated string value = 2;
+}
+
+message DefaultDecl {
+ enum Type {
+ REQUIRED = 0;
+ IMPLIED = 1;
+ FIXED = 2;
+ FUZZ = 3;
+ }
+ Type type = 1;
+ AttValue att = 2;
+}
+
+message AttDef {
+ // TODO: Add enumerated type
+ enum Type {
+ CDATA = 0;
+ ID = 1;
+ IDREF = 2;
+ IDREFS = 3;
+ ENTITY = 4;
+ ENTITIES = 5;
+ NMTOKEN = 6;
+ NMTOKENS = 7;
+ FUZZ = 8;
+ }
+ string name = 1;
+ Type type = 2;
+ DefaultDecl def = 3;
+}
+
+message AttListDecl {
+ string name = 1;
+ repeated AttDef attdefs = 2;
+}
+
+message NotationDecl {
+ string name = 1;
+ oneof notation_oneof {
+ ExternalId ext = 2;
+ string pub = 3;
+ string fuzz = 4;
+ }
+}
+
+message EntityValue {
+ enum Type {
+ ENTITY = 0;
+ CHAR = 1;
+ PEREF = 2;
+ FUZZ = 3;
+ }
+ Type type = 1;
+ repeated string name = 2;
+}
+
+message NDataDecl {
+ string name = 1;
+}
+
+message EntityDef {
+ oneof entity_oneof {
+ ExternalId ext = 1;
+ EntityValue val = 2;
+ }
+ NDataDecl ndata = 3;
+}
+
+message PEDef {
+ oneof pedef_oneof {
+ EntityValue val = 1;
+ ExternalId id = 2;
+ }
+}
+
+message EntityDecl {
+ enum Type {
+ GEDECL = 0;
+ PEDECL = 1;
+ }
+ Type type = 1;
+ string name = 2;
+ EntityDef ent = 3;
+ PEDef pedef = 4;
+}
+
+message ConditionalSect {
+ enum Type {
+ INCLUDE = 0;
+ IGNORE = 1;
+ FUZZ = 2;
+ }
+ Type type = 1;
+ ExtSubsetDecl ext = 2;
+ // TODO: Make this recursive
+ // See https://www.w3.org/TR/xml/#NT-conditionalSect
+ repeated string ignores = 3;
+}
+
+message OneExtSubsetDecl {
+ oneof extsubset_oneof {
+ MarkupDecl m = 1;
+ ConditionalSect c = 2;
+ }
+}
+
+message ExtSubsetDecl {
+ repeated OneExtSubsetDecl decls = 1;
+}
+
+message MarkupDecl {
+ oneof markup_oneof {
+ ElementDecl e = 1;
+ AttListDecl a = 2;
+ NotationDecl n = 3;
+ Misc m = 4;
+ EntityDecl entity = 5;
+ ExtSubsetDecl ext = 6;
+ }
+}
+
+message DocTypeDecl {
+ string name = 1;
+ ExternalId ext = 2;
+ repeated MarkupDecl mdecl = 3;
+}
+
+message Prolog {
+ XmlDeclaration decl = 1;
+ DocTypeDecl doctype = 2;
+ repeated Misc misc = 3;
+}
+
+message KeyValue {
+ enum XmlNamespace {
+ ATTRIBUTES = 0;
+ BASE = 1;
+ CATALOG = 2;
+ ID = 3;
+ LANG = 4;
+ LINK = 5;
+ SPACE = 6;
+ SPECIAL = 7;
+ TEST = 8;
+ FUZZ = 9;
+ }
+ XmlNamespace type = 1;
+ string key = 2;
+ string value = 3;
+}
+
+message ProcessingInstruction {
+ string name = 1;
+ repeated KeyValue kv = 2;
+}
+
+message CData {
+ string data = 1;
+}
+
+message Content {
+ // TODO: Add other content types
+ oneof content_oneof {
+ string str = 1;
+ Element e = 2;
+ CData c = 3;
+ }
+}
+
+message Element {
+ enum Type {
+ PREDEFINED = 0;
+ FUZZ = 1;
+ }
+ enum Id {
+ XIINCLUDE = 0;
+ XIFALLBACK = 1;
+ // Attributes of xinclude
+ XIHREF = 2;
+ XIPARSE = 3;
+ XIXPOINTER = 4;
+ XIENCODING = 5;
+ XIACCEPT = 6;
+ XIACCEPTLANG = 7;
+ }
+ Type type = 1;
+ Id id = 2;
+ string name = 3;
+ repeated KeyValue kv = 4;
+ Content content = 5;
+ string childprop = 6;
+}
+
+message VersionNum {
+ enum Type {
+ STANDARD = 0;
+ FUZZ = 1;
+ }
+ Type type = 1;
+ uint64 major = 2;
+ uint64 minor = 3;
+}
+
+message Encodings {
+ enum Enc {
+ BIG5 = 0;
+ EUCJP = 1;
+ EUCKR = 2;
+ GB18030 = 3;
+ ISO2022JP = 4;
+ ISO2022KR = 5;
+ ISO88591 = 6;
+ ISO88592 = 7;
+ ISO88593 = 8;
+ ISO88594 = 9;
+ ISO88595 = 10;
+ ISO88596 = 11;
+ ISO88597 = 12;
+ ISO88598 = 13;
+ ISO88599 = 14;
+ SHIFTJIS = 15;
+ TIS620 = 16;
+ USASCII = 17;
+ UTF8 = 18;
+ UTF16 = 19;
+ UTF16BE = 20;
+ UTF16LE = 21;
+ WINDOWS31J = 22;
+ WINDOWS1255 = 23;
+ WINDOWS1256 = 24;
+ FUZZ = 25;
+ }
+ Enc name = 1;
+ string fuzz = 2;
+}
+
+message XmlDeclaration {
+ VersionNum ver = 1;
+ Encodings enc = 2;
+ enum Standalone {
+ YES = 0;
+ NO = 1;
+ }
+ Standalone standalone = 3;
+ string fuzz = 4;
+}
+
+message XmlDocument {
+ Prolog p = 1;
+ repeated Element e = 2;
+}
+
+package xmlProtoFuzzer; \ No newline at end of file
diff --git a/projects/xerces-c/xmlProtoConverter.cpp b/projects/xerces-c/xmlProtoConverter.cpp
new file mode 100644
index 000000000..b2caf67a2
--- /dev/null
+++ b/projects/xerces-c/xmlProtoConverter.cpp
@@ -0,0 +1,718 @@
+/*
+ * Copyright (C) 2019 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "xmlProtoConverter.h"
+
+#include <algorithm>
+
+using namespace std;
+using namespace xmlProtoFuzzer;
+
+string ProtoConverter::removeNonAscii(string const& _utf8)
+{
+ string asciiStr{_utf8};
+ asciiStr.erase(remove_if(asciiStr.begin(), asciiStr.end(), [=](char c) -> bool {
+ return !(std::isalpha(c) || std::isdigit(c));
+ }), asciiStr.end());
+ return asciiStr.empty() ? "fuzz" : asciiStr;
+}
+
+
+void ProtoConverter::visit(Misc const& _x)
+{
+ switch (_x.misc_oneof_case())
+ {
+ case Misc::kComment:
+ m_output << "<!--" << _x.comment() << "-->\n";
+ break;
+ case Misc::kInst:
+ visit(_x.inst());
+ break;
+ case Misc::MISC_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(Prolog const& _x)
+{
+ visit(_x.decl());
+ visit(_x.doctype());
+ for (auto const& misc: _x.misc())
+ visit(misc);
+}
+
+void ProtoConverter::visit(KeyValue const& _x)
+{
+ switch (_x.type())
+ {
+ case KeyValue::ATTRIBUTES:
+ m_output << "xml:attributes=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::BASE:
+ m_output << "xml:base=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::CATALOG:
+ m_output << "xml:catalog=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::ID:
+ m_output << "xml:id=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::LANG:
+ m_output << "xml:lang=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::LINK:
+ m_output << "xml:link=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::SPACE:
+ m_output << "xml:space=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::SPECIAL:
+ m_output << "xml:special=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::TEST:
+ m_output << "xml:test=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue::FUZZ:
+ if (_x.ByteSizeLong() % 2)
+ m_output << "xmlns:" << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" ";
+ else
+ m_output << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" ";
+ break;
+ case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(ProcessingInstruction const& _x)
+{
+ m_output << "<?" << removeNonAscii(_x.name()) << " ";
+ for (auto const& prop: _x.kv())
+ visit(prop);
+ m_output << "?>\n";
+}
+
+void ProtoConverter::visit(Content const& _x)
+{
+ switch (_x.content_oneof_case())
+ {
+ case Content::kStr:
+ m_output << _x.str() << "\n";
+ break;
+ case Content::kE:
+ visit(_x.e());
+ m_output << "\n";
+ break;
+ case Content::kC:
+ visit(_x.c());
+ m_output << "\n";
+ break;
+ case Content::CONTENT_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(ElementDecl const& _x)
+{
+ m_output << "<!ELEMENT " << _x.name() << " ";
+ switch (_x.spec())
+ {
+ case ElementDecl::EMPTY:
+ m_output << "EMPTY>";
+ break;
+ case ElementDecl::ANY:
+ m_output << "ANY>";
+ break;
+ case ElementDecl::FUZZ:
+ m_output << "FUZZ>";
+ break;
+ case ElementDecl::MIXED:
+ m_output << "(#PCDATA";
+ for (auto const& pcdata: _x.cdata())
+ m_output << "|" << pcdata;
+ m_output << ")";
+ if (_x.cdata_size() > 0)
+ m_output << "*";
+ m_output << ">";
+ break;
+ case ElementDecl::CHILDREN:
+ {
+ m_output << "(";
+ string delim = "";
+ for (auto const& str: _x.cdata()) {
+ m_output << delim << removeNonAscii(str);
+ delim = ", ";
+ }
+ m_output << ")>";
+ break;
+ }
+ case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(AttValue const& _x)
+{
+ m_output << "\"";
+ string prefix;
+ switch (_x.type())
+ {
+ case AttValue::ENTITY:
+ prefix = "&";
+ break;
+ case AttValue::CHAR:
+ if (_x.ByteSizeLong() % 2)
+ prefix = "&#";
+ else
+ // TODO: Value that follows this must be a
+ // sequence of hex digits.
+ prefix = "&#x";
+ break;
+ case AttValue::FUZZ:
+ prefix = "fuzz";
+ break;
+ case AttValue_Type_AttValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case AttValue_Type_AttValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ for (auto const& name: _x.value())
+ m_output << prefix << removeNonAscii(name) << ";";
+ m_output << "\"";
+}
+
+void ProtoConverter::visit(DefaultDecl const& _x)
+{
+ switch (_x.type())
+ {
+ case DefaultDecl::REQUIRED:
+ m_output << "#REQUIRED";
+ break;
+ case DefaultDecl::IMPLIED:
+ m_output << "#IMPLIED";
+ break;
+ case DefaultDecl::FIXED:
+ m_output << "#FIXED ";
+ visit(_x.att());
+ break;
+ case DefaultDecl::FUZZ:
+ m_output << "#FUZZ";
+ break;
+ case DefaultDecl_Type_DefaultDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case DefaultDecl_Type_DefaultDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(AttDef const& _x)
+{
+ m_output << " " << removeNonAscii(_x.name()) << " ";
+ switch (_x.type())
+ {
+ case AttDef::CDATA:
+ m_output << "CDATA ";
+ break;
+ case AttDef::ID:
+ m_output << "ID ";
+ break;
+ case AttDef::IDREF:
+ m_output << "IDREF ";
+ break;
+ case AttDef::IDREFS:
+ m_output << "IDREFS ";
+ break;
+ case AttDef::ENTITY:
+ m_output << "ENTITY ";
+ break;
+ case AttDef::ENTITIES:
+ m_output << "ENTITIES ";
+ break;
+ case AttDef::NMTOKEN:
+ m_output << "NMTOKEN ";
+ break;
+ case AttDef::NMTOKENS:
+ m_output << "NMTOKENS ";
+ break;
+ case AttDef::FUZZ:
+ m_output << "FUZZ ";
+ break;
+ case AttDef_Type_AttDef_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case AttDef_Type_AttDef_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ visit(_x.def());
+}
+
+void ProtoConverter::visit(AttListDecl const& _x)
+{
+ m_output << "<!ATTLIST " << removeNonAscii(_x.name());
+ for (auto const& att: _x.attdefs())
+ visit(att);
+ m_output << ">";
+}
+
+void ProtoConverter::visit(NotationDecl const& _x)
+{
+ m_output << "<!NOTATION " << removeNonAscii(_x.name()) << " ";
+ switch (_x.notation_oneof_case())
+ {
+ case NotationDecl::kExt:
+ visit(_x.ext());
+ break;
+ case NotationDecl::kPub:
+ m_output << "PUBLIC " << _x.pub();
+ break;
+ case NotationDecl::kFuzz:
+ m_output << "FUZZ " << _x.fuzz();
+ break;
+ case NotationDecl::NOTATION_ONEOF_NOT_SET:
+ break;
+ }
+ m_output << ">";
+}
+
+void ProtoConverter::visit(NDataDecl const& _x)
+{
+ m_output << " NDATA " << _x.name();
+}
+
+void ProtoConverter::visit(EntityDef const& _x)
+{
+ switch (_x.entity_oneof_case())
+ {
+ case EntityDef::kExt:
+ visit(_x.ext());
+ if (_x.ByteSizeLong() % 2)
+ visit(_x.ndata());
+ break;
+ case EntityDef::kVal:
+ visit(_x.val());
+ break;
+ case EntityDef::ENTITY_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(PEDef const& _x)
+{
+ switch (_x.pedef_oneof_case())
+ {
+ case PEDef::kVal:
+ visit(_x.val());
+ break;
+ case PEDef::kId:
+ visit(_x.id());
+ break;
+ case PEDef::PEDEF_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+void ProtoConverter::visit(EntityValue const& _x)
+{
+ m_output << "\"";
+ string prefix;
+ switch (_x.type())
+ {
+ case EntityValue::ENTITY:
+ prefix = "&";
+ break;
+ case EntityValue::CHAR:
+ if (_x.ByteSizeLong() % 2)
+ prefix = "&#";
+ else
+ prefix = "&#x";
+ break;
+ case EntityValue::PEREF:
+ prefix = "%";
+ break;
+ case EntityValue::FUZZ:
+ prefix = "fuzz";
+ break;
+ case EntityValue_Type_EntityValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case EntityValue_Type_EntityValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ for (auto const& ref: _x.name())
+ m_output << prefix << ref << ";";
+ m_output << "\"";
+}
+
+void ProtoConverter::visit(EntityDecl const& _x)
+{
+ m_output << "<!ENTITY ";
+ switch (_x.type())
+ {
+ case EntityDecl::GEDECL:
+ m_output << _x.name() << " ";
+ visit(_x.ent());
+ break;
+ case EntityDecl::PEDECL:
+ m_output << "% " << _x.name() << " ";
+ visit(_x.pedef());
+ break;
+ case EntityDecl_Type_EntityDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case EntityDecl_Type_EntityDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ m_output << ">";
+}
+
+void ProtoConverter::visit(ConditionalSect const& _x)
+{
+ switch (_x.type())
+ {
+ case ConditionalSect::INCLUDE:
+ m_output << "<![ INCLUDE [";
+ visit(_x.ext());
+ m_output << "]]>";
+ break;
+ case ConditionalSect::IGNORE:
+ m_output << "<![ IGNORE [";
+ for (auto const& str: _x.ignores())
+ m_output << "<![" << removeNonAscii(str) << "]]>";
+ m_output << "]]>";
+ break;
+ case ConditionalSect::FUZZ:
+ m_output << "<![ FUZZ [";
+ visit(_x.ext());
+ m_output << "]]>";
+ break;
+ case ConditionalSect_Type_ConditionalSect_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case ConditionalSect_Type_ConditionalSect_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+
+void ProtoConverter::visit(OneExtSubsetDecl const& _x)
+{
+ switch (_x.extsubset_oneof_case())
+ {
+ case OneExtSubsetDecl::kM:
+ visit(_x.m());
+ break;
+ case OneExtSubsetDecl::kC:
+ visit(_x.c());
+ break;
+ case OneExtSubsetDecl::EXTSUBSET_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+
+void ProtoConverter::visit(ExtSubsetDecl const& _x)
+{
+ for (auto const& decl: _x.decls())
+ visit(decl);
+}
+
+void ProtoConverter::visit(CData const& _x)
+{
+ m_output << "<![CDATA[" << removeNonAscii(_x.data()) << "]]>";
+}
+
+void ProtoConverter::visit(MarkupDecl const& _x)
+{
+ switch (_x.markup_oneof_case())
+ {
+ case MarkupDecl::kE:
+ visit(_x.e());
+ break;
+ case MarkupDecl::kA:
+ visit(_x.a());
+ break;
+ case MarkupDecl::kN:
+ visit(_x.n());
+ break;
+ case MarkupDecl::kM:
+ visit(_x.m());
+ break;
+ case MarkupDecl::kEntity:
+ visit(_x.entity());
+ break;
+ case MarkupDecl::kExt:
+ visit(_x.ext());
+ break;
+ case MarkupDecl::MARKUP_ONEOF_NOT_SET:
+ break;
+ }
+}
+
+/// Returns predefined element from an Element_Id enum
+/// @param _x is an enum that holds the desired type of predefined value
+/// @param _prop is a string that holds the value of the desired type
+/// @return string holding the predefined value of the form
+/// name attribute=\"value\"
+string ProtoConverter::getPredefined(Element_Id _x, string const& _prop)
+{
+ string output{};
+ switch (_x)
+ {
+ case Element::XIINCLUDE:
+ case Element::XIFALLBACK:
+ case Element::XIHREF:
+ output = "xi:include href=\"fuzz.xml\"";
+ case Element::XIPARSE:
+ output = "xi:include parse=\"xml\"";
+ case Element::XIXPOINTER:
+ output = "xi:include xpointer=\"" + removeNonAscii(_prop) + "\"";
+ case Element::XIENCODING:
+ output = "xi:include encoding=\"" + removeNonAscii(_prop) + "\"";
+ case Element::XIACCEPT:
+ output = "xi:include accept=\"" + removeNonAscii(_prop) + "\"";
+ case Element::XIACCEPTLANG:
+ output = "xi:include accept-language=\"" + removeNonAscii(_prop) + "\"";
+ case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_:
+ output = "xi:fuzz xifuzz=\"fuzz\"";
+ }
+ return output;
+}
+
+/// Returns uri string for a given Element_Id type
+string ProtoConverter::getUri(Element_Id _x)
+{
+ switch (_x)
+ {
+ case Element::XIINCLUDE:
+ case Element::XIFALLBACK:
+ case Element::XIHREF:
+ case Element::XIPARSE:
+ case Element::XIXPOINTER:
+ case Element::XIENCODING:
+ case Element::XIACCEPT:
+ case Element::XIACCEPTLANG:
+ case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_:
+ return s_XInclude;
+ }
+}
+
+void ProtoConverter::visit(Element const& _x)
+{
+ // Predefined child node
+ string child = {};
+ // Predefined uri for child node
+ string pUri = {};
+ // Element name
+ string name = removeNonAscii(_x.name());
+
+ switch (_x.type())
+ {
+ case Element::PREDEFINED:
+ child = getPredefined(_x.id(), _x.childprop());
+ pUri = getUri(_x.id());
+ break;
+ case Element::FUZZ:
+ case Element_Type_Element_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Element_Type_Element_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+
+ // <name k1=v1 k2=v2 k3=v3>
+ // <content>
+ // </name>
+
+ // Start name tag: Must be Ascii?
+ m_output << "<" << name << " ";
+
+ // Add uri to name tag
+ if (!pUri.empty())
+ m_output << pUri << " ";
+ for (auto const& prop: _x.kv())
+ visit(prop);
+ m_output << ">\n";
+
+ // Add attribute
+ if (!child.empty())
+ m_output << "<" << child << "/>\n";
+
+ // Add content
+ visit(_x.content());
+
+ // Close name tag
+ m_output << "</" << name << ">\n";
+}
+
+void ProtoConverter::visit(ExternalId const& _x)
+{
+ switch (_x.type())
+ {
+ case ExternalId::SYSTEM:
+ m_output << "SYSTEM " << "\"" << removeNonAscii(_x.system()) << "\"";
+ break;
+ case ExternalId::PUBLIC:
+ m_output << "PUBLIC " << "\"" << removeNonAscii(_x.pub()) << "\""
+ << " " << "\"" << removeNonAscii(_x.system()) << "\"";
+ break;
+ case ExternalId::FUZZ:
+ m_output << "FUZZ " << "\"" << removeNonAscii(_x.pub()) << "\"";
+ break;
+ case ExternalId_Type_ExternalId_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case ExternalId_Type_ExternalId_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+}
+
+void ProtoConverter::visit(DocTypeDecl const& _x)
+{
+ m_output << "<!DOCTYPE " << removeNonAscii(_x.name()) << " ";
+ visit(_x.ext());
+ m_output << "[";
+ for (auto const& m: _x.mdecl())
+ visit(m);
+ m_output << "]";
+ m_output << ">\n";
+}
+
+void ProtoConverter::visit(VersionNum const& _x)
+{
+ switch (_x.type())
+ {
+ case VersionNum::STANDARD:
+ m_output << "\"1.0\"";
+ break;
+ case VersionNum::FUZZ:
+ case VersionNum_Type_VersionNum_Type_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case VersionNum_Type_VersionNum_Type_INT_MAX_SENTINEL_DO_NOT_USE_:
+ m_output << "\"" << _x.major() << "." << _x.minor() << "\"";
+ break;
+ }
+}
+
+void ProtoConverter::visit(Encodings const& _x)
+{
+ m_output << " encoding=\"";
+ switch (_x.name())
+ {
+ case Encodings::BIG5:
+ m_output << "BIG5";
+ break;
+ case Encodings::EUCJP:
+ m_output << "EUC-JP";
+ break;
+ case Encodings::EUCKR:
+ m_output << "EUC-KR";
+ break;
+ case Encodings::GB18030:
+ m_output << "GB18030";
+ break;
+ case Encodings::ISO2022JP:
+ m_output << "ISO-2022-JP";
+ break;
+ case Encodings::ISO2022KR:
+ m_output << "ISO-2022-KR";
+ break;
+ case Encodings::ISO88591:
+ m_output << "ISO-8859-1";
+ break;
+ case Encodings::ISO88592:
+ m_output << "ISO-8859-2";
+ break;
+ case Encodings::ISO88593:
+ m_output << "ISO-8859-3";
+ break;
+ case Encodings::ISO88594:
+ m_output << "ISO-8859-4";
+ break;
+ case Encodings::ISO88595:
+ m_output << "ISO-8859-5";
+ break;
+ case Encodings::ISO88596:
+ m_output << "ISO-8859-6";
+ break;
+ case Encodings::ISO88597:
+ m_output << "ISO-8859-7";
+ break;
+ case Encodings::ISO88598:
+ m_output << "ISO-8859-8";
+ break;
+ case Encodings::ISO88599:
+ m_output << "ISO-8859-9";
+ break;
+ case Encodings::SHIFTJIS:
+ m_output << "SHIFT_JIS";
+ break;
+ case Encodings::TIS620:
+ m_output << "TIS-620";
+ break;
+ case Encodings::USASCII:
+ m_output << "US-ASCII";
+ break;
+ case Encodings::UTF8:
+ m_output << "UTF-8";
+ break;
+ case Encodings::UTF16:
+ m_output << "UTF-16";
+ break;
+ case Encodings::UTF16BE:
+ m_output << "UTF-16BE";
+ break;
+ case Encodings::UTF16LE:
+ m_output << "UTF-16LE";
+ break;
+ case Encodings::WINDOWS31J:
+ m_output << "WINDOWS-31J";
+ break;
+ case Encodings::WINDOWS1255:
+ m_output << "WINDOWS-1255";
+ break;
+ case Encodings::WINDOWS1256:
+ m_output << "WINDOWS-1256";
+ break;
+ case Encodings::FUZZ:
+ m_output << removeNonAscii(_x.fuzz());
+ break;
+ case Encodings_Enc_Encodings_Enc_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case Encodings_Enc_Encodings_Enc_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ m_output << "\"";
+}
+
+void ProtoConverter::visit(XmlDeclaration const& _x)
+{
+ m_output << R"(<?xml version=)";
+ visit(_x.ver());
+ visit(_x.enc());
+ switch (_x.standalone())
+ {
+ case XmlDeclaration::YES:
+ m_output << " standalone=\'yes\'";
+ break;
+ case XmlDeclaration::NO:
+ m_output << " standalone=\'no\'";
+ break;
+ case XmlDeclaration_Standalone_XmlDeclaration_Standalone_INT_MIN_SENTINEL_DO_NOT_USE_:
+ case XmlDeclaration_Standalone_XmlDeclaration_Standalone_INT_MAX_SENTINEL_DO_NOT_USE_:
+ break;
+ }
+ m_output << "?>\n";
+}
+
+void ProtoConverter::visit(XmlDocument const& _x)
+{
+ visit(_x.p());
+ for (auto const& element: _x.e())
+ visit(element);
+}
+
+string ProtoConverter::protoToString(XmlDocument const& _x)
+{
+ visit(_x);
+ return m_output.str();
+} \ No newline at end of file
diff --git a/projects/xerces-c/xmlProtoConverter.h b/projects/xerces-c/xmlProtoConverter.h
new file mode 100644
index 000000000..a6333f1b3
--- /dev/null
+++ b/projects/xerces-c/xmlProtoConverter.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2019 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <sstream>
+
+#include "xml.pb.h"
+
+namespace xmlProtoFuzzer {
+class ProtoConverter
+{
+public:
+ ProtoConverter() = default;
+
+ ProtoConverter(ProtoConverter const&) = delete;
+
+ ProtoConverter(ProtoConverter&&) = delete;
+
+ std::string protoToString(XmlDocument const&);
+
+private:
+ void visit(Prolog const&);
+
+ void visit(ProcessingInstruction const&);
+
+ void visit(ExternalId const&);
+
+ void visit(DocTypeDecl const&);
+
+ void visit(VersionNum const&);
+
+ void visit(Encodings const&);
+
+ void visit(Misc const&);
+
+ void visit(KeyValue const&);
+
+ void visit(Element const&);
+
+ void visit(ElementDecl const&);
+
+ void visit(AttValue const&);
+
+ void visit(DefaultDecl const&);
+
+ void visit(AttDef const&);
+
+ void visit(AttListDecl const&);
+
+ void visit(NotationDecl const&);
+
+ void visit(EntityDecl const&);
+
+ void visit(EntityValue const&);
+
+ void visit(EntityDef const&);
+
+ void visit(PEDef const&);
+
+ void visit(NDataDecl const&);
+
+ void visit(ConditionalSect const&);
+
+ void visit(OneExtSubsetDecl const&);
+
+ void visit(ExtSubsetDecl const&);
+
+ void visit(MarkupDecl const&);
+
+ void visit(CData const&);
+
+ void visit(Content const&);
+
+ void visit(XmlDeclaration const&);
+
+ void visit(XmlDocument const&);
+
+ std::string removeNonAscii(std::string const&);
+ std::string getUri(Element_Id _x);
+ std::string getPredefined(Element_Id _x, std::string const&);
+
+ std::ostringstream m_output;
+
+ static constexpr auto s_XInclude = "xmlns:xi=\"http://www.w3.org/2001/XInclude\"";
+};
+}
+