diff options
author | Vincent Ulitzsch <vincent.ulitzsch@live.de> | 2019-12-13 20:07:39 +0100 |
---|---|---|
committer | jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com> | 2019-12-13 11:07:39 -0800 |
commit | 7abdcd9b8fde67ac219713d467ce75ce8537c3d3 (patch) | |
tree | f3f3236dbaebf7129aacd324b6333b2f962f8e78 | |
parent | b03bc0945de65e7b811eab12a3680eca42640e35 (diff) | |
download | oss-fuzz-7abdcd9b8fde67ac219713d467ce75ce8537c3d3.tar.gz |
Add initial integration xerces-c (#3083)
-rwxr-xr-x | projects/xerces-c/Dockerfile | 21 | ||||
-rwxr-xr-x | projects/xerces-c/build.sh | 41 | ||||
-rwxr-xr-x | projects/xerces-c/parse_target.cpp | 28 | ||||
-rw-r--r-- | projects/xerces-c/parse_target_proto.cpp | 45 | ||||
-rwxr-xr-x | projects/xerces-c/project.yaml | 9 | ||||
-rwxr-xr-x | projects/xerces-c/xerces_fuzz_common.cpp | 47 | ||||
-rw-r--r-- | projects/xerces-c/xerces_fuzz_common.h | 23 | ||||
-rwxr-xr-x | projects/xerces-c/xml.proto | 339 | ||||
-rw-r--r-- | projects/xerces-c/xmlProtoConverter.cpp | 718 | ||||
-rw-r--r-- | projects/xerces-c/xmlProtoConverter.h | 101 |
10 files changed, 1372 insertions, 0 deletions
diff --git a/projects/xerces-c/Dockerfile b/projects/xerces-c/Dockerfile new file mode 100755 index 000000000..9df098e50 --- /dev/null +++ b/projects/xerces-c/Dockerfile @@ -0,0 +1,21 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +FROM gcr.io/oss-fuzz-base/base-builder +MAINTAINER vincent.ulitzsch@live.de +RUN apt-get update && apt-get install -y make autoconf automake libtool wget zlib1g-dev libtool ninja-build cmake subversion +RUN svn co https://svn.apache.org/repos/asf/xerces/c/trunk $SRC/xerces-c +RUN git clone --depth 1 https://github.com/google/libprotobuf-mutator.git +RUN (mkdir LPM && cd LPM && cmake ../libprotobuf-mutator -GNinja -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON -DLIB_PROTO_MUTATOR_TESTING=OFF -DCMAKE_BUILD_TYPE=Release && ninja) +COPY *.c *.options build.sh *.h *.cc *.cpp *.proto $SRC/ diff --git a/projects/xerces-c/build.sh b/projects/xerces-c/build.sh new file mode 100755 index 000000000..bc02e982d --- /dev/null +++ b/projects/xerces-c/build.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +#https://github.com/linux-on-ibm-z/docs/wiki/Building-Xerces +set -e +cd $SRC/xerces-c +./reconf +./configure +make -j + +cd $SRC + +$CXX $CXXFLAGS $LIB_FUZZING_ENGINE -std=c++11 \ + -I. -Ixerces-c/src \ + xerces_fuzz_common.cpp parse_target.cpp -o $OUT/parse_target \ + xerces-c/src/.libs/libxerces-c.a + +if [[ $CFLAGS != *sanitize=memory* ]]; then + rm -rf genfiles && mkdir genfiles && LPM/external.protobuf/bin/protoc xml.proto --cpp_out=genfiles + + $CXX $CXXFLAGS $LIB_FUZZING_ENGINE -std=c++11 \ + -I. -I xerces-c/src -Ixerces-c/build/src genfiles/xml.pb.cc xmlProtoConverter.cpp xerces_fuzz_common.cpp parse_target_proto.cpp \ + -I libprotobuf-mutator/ \ + -I genfiles \ + -I LPM/external.protobuf/include \ + -o $OUT/parse_target_proto xerces-c/src/.libs/libxerces-c.a \ + LPM/src/libfuzzer/libprotobuf-mutator-libfuzzer.a \ + LPM/src/libprotobuf-mutator.a \ + LPM/external.protobuf/lib/libprotobuf.a +fi diff --git a/projects/xerces-c/parse_target.cpp b/projects/xerces-c/parse_target.cpp new file mode 100755 index 000000000..5e976765a --- /dev/null +++ b/projects/xerces-c/parse_target.cpp @@ -0,0 +1,28 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#include "xerces_fuzz_common.h" + +#include "xercesc/framework/MemBufInputSource.hpp" +#include "xercesc/parsers/SAXParser.hpp" +#include "xercesc/util/OutOfMemoryException.hpp" + +using namespace xercesc_3_2; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + parseInMemory(Data, Size); + return 0; +} diff --git a/projects/xerces-c/parse_target_proto.cpp b/projects/xerces-c/parse_target_proto.cpp new file mode 100644 index 000000000..b1fd33cbe --- /dev/null +++ b/projects/xerces-c/parse_target_proto.cpp @@ -0,0 +1,45 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#include "xerces_fuzz_common.h" +#include "xmlProtoConverter.h" + +#include "xercesc/framework/MemBufInputSource.hpp" +#include "xercesc/parsers/SAXParser.hpp" +#include "xercesc/util/OutOfMemoryException.hpp" + +#include "genfiles/xml.pb.h" + +#include "src/libfuzzer/libfuzzer_macro.h" + +#include <iostream> + +namespace { + protobuf_mutator::protobuf::LogSilencer log_silincer; + void ignore(void* ctx, const char* msg, ...) {} + + template <class T, class D> + std::unique_ptr<T, D> MakeUnique(T* obj, D del) { + return {obj, del}; + } +} + +using namespace xercesc_3_2; + +DEFINE_PROTO_FUZZER(const xmlProtoFuzzer::XmlDocument& xmlDocument) { + std::string xmlData = xmlProtoFuzzer::ProtoConverter().protoToString(xmlDocument); + parseInMemory((const uint8_t *)xmlData.c_str(), xmlData.size()); +} diff --git a/projects/xerces-c/project.yaml b/projects/xerces-c/project.yaml new file mode 100755 index 000000000..41da7a725 --- /dev/null +++ b/projects/xerces-c/project.yaml @@ -0,0 +1,9 @@ +homepage: "https://xerces.apache.org/" +primary_contact: "vincent.ulitzsch@live.de" +auto_ccs: + - "vincent.ulitzsch@live.de" + - "bshas3@gmail.com" +sanitizers: + - address + - memory + - undefined diff --git a/projects/xerces-c/xerces_fuzz_common.cpp b/projects/xerces-c/xerces_fuzz_common.cpp new file mode 100755 index 000000000..a76b383a8 --- /dev/null +++ b/projects/xerces-c/xerces_fuzz_common.cpp @@ -0,0 +1,47 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#include "xerces_fuzz_common.h" + +using namespace xercesc_3_2; +static bool initialized = false; + +void parseInMemory(const uint8_t *Data, size_t Size) +{ + if (!initialized) + { + XMLPlatformUtils::Initialize(); + initialized = true; + } + SAXParser::ValSchemes valScheme = SAXParser::Val_Auto; + bool doNamespaces = false; + bool doSchema = false; + bool schemaFullChecking = false; + SAXParser *parser = new SAXParser; + parser->setValidationScheme(valScheme); + parser->setDoNamespaces(doNamespaces); + parser->setDoSchema(doSchema); + parser->setHandleMultipleImports(true); + parser->setValidationSchemaFullChecking(schemaFullChecking); + static const char *gMemBufId = "prodInfo"; + + MemBufInputSource *memBufIS = new MemBufInputSource( + (const XMLByte *)Data, Size, gMemBufId, false); + parser->parse(*memBufIS); + delete parser; + delete memBufIS; + //XMLPlatformUtils::Terminate(); +} diff --git a/projects/xerces-c/xerces_fuzz_common.h b/projects/xerces-c/xerces_fuzz_common.h new file mode 100644 index 000000000..9eaf88bd7 --- /dev/null +++ b/projects/xerces-c/xerces_fuzz_common.h @@ -0,0 +1,23 @@ +/* +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +*/ +#pragma once + +#include "xercesc/parsers/SAXParser.hpp" +#include "xercesc/framework/MemBufInputSource.hpp" +#include "xercesc/util/OutOfMemoryException.hpp" + +void parseInMemory(const uint8_t *Data, size_t Size);
\ No newline at end of file diff --git a/projects/xerces-c/xml.proto b/projects/xerces-c/xml.proto new file mode 100755 index 000000000..75e54b6f0 --- /dev/null +++ b/projects/xerces-c/xml.proto @@ -0,0 +1,339 @@ +/* + * Copyright (C) 2019 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +message Misc { + oneof misc_oneof { + string comment = 1; + ProcessingInstruction inst = 2; + } +} + +message PEReference { + string name = 1; +} + +message ElementDecl { + enum ContentSpec { + EMPTY = 0; + ANY = 1; + FUZZ = 2; + MIXED = 3; + CHILDREN = 4; + } + string name = 1; + ContentSpec spec = 2; + repeated string cdata = 3; +} + +message AttrType { + enum Type { + CDATA = 0; + ID = 1; + IDREF = 2; + IDREFS = 3; + ENTITY = 4; + ENTITIES = 5; + NMTOKEN = 6; + NMTOKENS = 7; + } + Type ty = 1; +} + +message EnumeratedType { + repeated string names = 1; +} + +message AttrListDecl { + string name = 1; + AttrType atype = 2; + EnumeratedType etype = 3; + DefaultDecl def = 4; +} + +message ExternalId { + enum Type { + SYSTEM = 0; + PUBLIC = 1; + FUZZ = 2; + } + Type type = 1; + string system = 2; + string pub = 3; +} + +message AttValue { + enum Type { + ENTITY = 0; + CHAR = 1; + FUZZ = 2; + } + Type type = 1; + repeated string value = 2; +} + +message DefaultDecl { + enum Type { + REQUIRED = 0; + IMPLIED = 1; + FIXED = 2; + FUZZ = 3; + } + Type type = 1; + AttValue att = 2; +} + +message AttDef { + // TODO: Add enumerated type + enum Type { + CDATA = 0; + ID = 1; + IDREF = 2; + IDREFS = 3; + ENTITY = 4; + ENTITIES = 5; + NMTOKEN = 6; + NMTOKENS = 7; + FUZZ = 8; + } + string name = 1; + Type type = 2; + DefaultDecl def = 3; +} + +message AttListDecl { + string name = 1; + repeated AttDef attdefs = 2; +} + +message NotationDecl { + string name = 1; + oneof notation_oneof { + ExternalId ext = 2; + string pub = 3; + string fuzz = 4; + } +} + +message EntityValue { + enum Type { + ENTITY = 0; + CHAR = 1; + PEREF = 2; + FUZZ = 3; + } + Type type = 1; + repeated string name = 2; +} + +message NDataDecl { + string name = 1; +} + +message EntityDef { + oneof entity_oneof { + ExternalId ext = 1; + EntityValue val = 2; + } + NDataDecl ndata = 3; +} + +message PEDef { + oneof pedef_oneof { + EntityValue val = 1; + ExternalId id = 2; + } +} + +message EntityDecl { + enum Type { + GEDECL = 0; + PEDECL = 1; + } + Type type = 1; + string name = 2; + EntityDef ent = 3; + PEDef pedef = 4; +} + +message ConditionalSect { + enum Type { + INCLUDE = 0; + IGNORE = 1; + FUZZ = 2; + } + Type type = 1; + ExtSubsetDecl ext = 2; + // TODO: Make this recursive + // See https://www.w3.org/TR/xml/#NT-conditionalSect + repeated string ignores = 3; +} + +message OneExtSubsetDecl { + oneof extsubset_oneof { + MarkupDecl m = 1; + ConditionalSect c = 2; + } +} + +message ExtSubsetDecl { + repeated OneExtSubsetDecl decls = 1; +} + +message MarkupDecl { + oneof markup_oneof { + ElementDecl e = 1; + AttListDecl a = 2; + NotationDecl n = 3; + Misc m = 4; + EntityDecl entity = 5; + ExtSubsetDecl ext = 6; + } +} + +message DocTypeDecl { + string name = 1; + ExternalId ext = 2; + repeated MarkupDecl mdecl = 3; +} + +message Prolog { + XmlDeclaration decl = 1; + DocTypeDecl doctype = 2; + repeated Misc misc = 3; +} + +message KeyValue { + enum XmlNamespace { + ATTRIBUTES = 0; + BASE = 1; + CATALOG = 2; + ID = 3; + LANG = 4; + LINK = 5; + SPACE = 6; + SPECIAL = 7; + TEST = 8; + FUZZ = 9; + } + XmlNamespace type = 1; + string key = 2; + string value = 3; +} + +message ProcessingInstruction { + string name = 1; + repeated KeyValue kv = 2; +} + +message CData { + string data = 1; +} + +message Content { + // TODO: Add other content types + oneof content_oneof { + string str = 1; + Element e = 2; + CData c = 3; + } +} + +message Element { + enum Type { + PREDEFINED = 0; + FUZZ = 1; + } + enum Id { + XIINCLUDE = 0; + XIFALLBACK = 1; + // Attributes of xinclude + XIHREF = 2; + XIPARSE = 3; + XIXPOINTER = 4; + XIENCODING = 5; + XIACCEPT = 6; + XIACCEPTLANG = 7; + } + Type type = 1; + Id id = 2; + string name = 3; + repeated KeyValue kv = 4; + Content content = 5; + string childprop = 6; +} + +message VersionNum { + enum Type { + STANDARD = 0; + FUZZ = 1; + } + Type type = 1; + uint64 major = 2; + uint64 minor = 3; +} + +message Encodings { + enum Enc { + BIG5 = 0; + EUCJP = 1; + EUCKR = 2; + GB18030 = 3; + ISO2022JP = 4; + ISO2022KR = 5; + ISO88591 = 6; + ISO88592 = 7; + ISO88593 = 8; + ISO88594 = 9; + ISO88595 = 10; + ISO88596 = 11; + ISO88597 = 12; + ISO88598 = 13; + ISO88599 = 14; + SHIFTJIS = 15; + TIS620 = 16; + USASCII = 17; + UTF8 = 18; + UTF16 = 19; + UTF16BE = 20; + UTF16LE = 21; + WINDOWS31J = 22; + WINDOWS1255 = 23; + WINDOWS1256 = 24; + FUZZ = 25; + } + Enc name = 1; + string fuzz = 2; +} + +message XmlDeclaration { + VersionNum ver = 1; + Encodings enc = 2; + enum Standalone { + YES = 0; + NO = 1; + } + Standalone standalone = 3; + string fuzz = 4; +} + +message XmlDocument { + Prolog p = 1; + repeated Element e = 2; +} + +package xmlProtoFuzzer;
\ No newline at end of file diff --git a/projects/xerces-c/xmlProtoConverter.cpp b/projects/xerces-c/xmlProtoConverter.cpp new file mode 100644 index 000000000..b2caf67a2 --- /dev/null +++ b/projects/xerces-c/xmlProtoConverter.cpp @@ -0,0 +1,718 @@ +/* + * Copyright (C) 2019 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "xmlProtoConverter.h" + +#include <algorithm> + +using namespace std; +using namespace xmlProtoFuzzer; + +string ProtoConverter::removeNonAscii(string const& _utf8) +{ + string asciiStr{_utf8}; + asciiStr.erase(remove_if(asciiStr.begin(), asciiStr.end(), [=](char c) -> bool { + return !(std::isalpha(c) || std::isdigit(c)); + }), asciiStr.end()); + return asciiStr.empty() ? "fuzz" : asciiStr; +} + + +void ProtoConverter::visit(Misc const& _x) +{ + switch (_x.misc_oneof_case()) + { + case Misc::kComment: + m_output << "<!--" << _x.comment() << "-->\n"; + break; + case Misc::kInst: + visit(_x.inst()); + break; + case Misc::MISC_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(Prolog const& _x) +{ + visit(_x.decl()); + visit(_x.doctype()); + for (auto const& misc: _x.misc()) + visit(misc); +} + +void ProtoConverter::visit(KeyValue const& _x) +{ + switch (_x.type()) + { + case KeyValue::ATTRIBUTES: + m_output << "xml:attributes=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::BASE: + m_output << "xml:base=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::CATALOG: + m_output << "xml:catalog=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::ID: + m_output << "xml:id=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::LANG: + m_output << "xml:lang=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::LINK: + m_output << "xml:link=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::SPACE: + m_output << "xml:space=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::SPECIAL: + m_output << "xml:special=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::TEST: + m_output << "xml:test=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue::FUZZ: + if (_x.ByteSizeLong() % 2) + m_output << "xmlns:" << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" "; + else + m_output << removeNonAscii(_x.key()) << "=\"" << removeNonAscii(_x.value()) << "\" "; + break; + case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MIN_SENTINEL_DO_NOT_USE_: + case KeyValue_XmlNamespace_KeyValue_XmlNamespace_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(ProcessingInstruction const& _x) +{ + m_output << "<?" << removeNonAscii(_x.name()) << " "; + for (auto const& prop: _x.kv()) + visit(prop); + m_output << "?>\n"; +} + +void ProtoConverter::visit(Content const& _x) +{ + switch (_x.content_oneof_case()) + { + case Content::kStr: + m_output << _x.str() << "\n"; + break; + case Content::kE: + visit(_x.e()); + m_output << "\n"; + break; + case Content::kC: + visit(_x.c()); + m_output << "\n"; + break; + case Content::CONTENT_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(ElementDecl const& _x) +{ + m_output << "<!ELEMENT " << _x.name() << " "; + switch (_x.spec()) + { + case ElementDecl::EMPTY: + m_output << "EMPTY>"; + break; + case ElementDecl::ANY: + m_output << "ANY>"; + break; + case ElementDecl::FUZZ: + m_output << "FUZZ>"; + break; + case ElementDecl::MIXED: + m_output << "(#PCDATA"; + for (auto const& pcdata: _x.cdata()) + m_output << "|" << pcdata; + m_output << ")"; + if (_x.cdata_size() > 0) + m_output << "*"; + m_output << ">"; + break; + case ElementDecl::CHILDREN: + { + m_output << "("; + string delim = ""; + for (auto const& str: _x.cdata()) { + m_output << delim << removeNonAscii(str); + delim = ", "; + } + m_output << ")>"; + break; + } + case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MIN_SENTINEL_DO_NOT_USE_: + case ElementDecl_ContentSpec_ElementDecl_ContentSpec_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(AttValue const& _x) +{ + m_output << "\""; + string prefix; + switch (_x.type()) + { + case AttValue::ENTITY: + prefix = "&"; + break; + case AttValue::CHAR: + if (_x.ByteSizeLong() % 2) + prefix = "&#"; + else + // TODO: Value that follows this must be a + // sequence of hex digits. + prefix = "&#x"; + break; + case AttValue::FUZZ: + prefix = "fuzz"; + break; + case AttValue_Type_AttValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case AttValue_Type_AttValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + for (auto const& name: _x.value()) + m_output << prefix << removeNonAscii(name) << ";"; + m_output << "\""; +} + +void ProtoConverter::visit(DefaultDecl const& _x) +{ + switch (_x.type()) + { + case DefaultDecl::REQUIRED: + m_output << "#REQUIRED"; + break; + case DefaultDecl::IMPLIED: + m_output << "#IMPLIED"; + break; + case DefaultDecl::FIXED: + m_output << "#FIXED "; + visit(_x.att()); + break; + case DefaultDecl::FUZZ: + m_output << "#FUZZ"; + break; + case DefaultDecl_Type_DefaultDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case DefaultDecl_Type_DefaultDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(AttDef const& _x) +{ + m_output << " " << removeNonAscii(_x.name()) << " "; + switch (_x.type()) + { + case AttDef::CDATA: + m_output << "CDATA "; + break; + case AttDef::ID: + m_output << "ID "; + break; + case AttDef::IDREF: + m_output << "IDREF "; + break; + case AttDef::IDREFS: + m_output << "IDREFS "; + break; + case AttDef::ENTITY: + m_output << "ENTITY "; + break; + case AttDef::ENTITIES: + m_output << "ENTITIES "; + break; + case AttDef::NMTOKEN: + m_output << "NMTOKEN "; + break; + case AttDef::NMTOKENS: + m_output << "NMTOKENS "; + break; + case AttDef::FUZZ: + m_output << "FUZZ "; + break; + case AttDef_Type_AttDef_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case AttDef_Type_AttDef_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + visit(_x.def()); +} + +void ProtoConverter::visit(AttListDecl const& _x) +{ + m_output << "<!ATTLIST " << removeNonAscii(_x.name()); + for (auto const& att: _x.attdefs()) + visit(att); + m_output << ">"; +} + +void ProtoConverter::visit(NotationDecl const& _x) +{ + m_output << "<!NOTATION " << removeNonAscii(_x.name()) << " "; + switch (_x.notation_oneof_case()) + { + case NotationDecl::kExt: + visit(_x.ext()); + break; + case NotationDecl::kPub: + m_output << "PUBLIC " << _x.pub(); + break; + case NotationDecl::kFuzz: + m_output << "FUZZ " << _x.fuzz(); + break; + case NotationDecl::NOTATION_ONEOF_NOT_SET: + break; + } + m_output << ">"; +} + +void ProtoConverter::visit(NDataDecl const& _x) +{ + m_output << " NDATA " << _x.name(); +} + +void ProtoConverter::visit(EntityDef const& _x) +{ + switch (_x.entity_oneof_case()) + { + case EntityDef::kExt: + visit(_x.ext()); + if (_x.ByteSizeLong() % 2) + visit(_x.ndata()); + break; + case EntityDef::kVal: + visit(_x.val()); + break; + case EntityDef::ENTITY_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(PEDef const& _x) +{ + switch (_x.pedef_oneof_case()) + { + case PEDef::kVal: + visit(_x.val()); + break; + case PEDef::kId: + visit(_x.id()); + break; + case PEDef::PEDEF_ONEOF_NOT_SET: + break; + } +} + +void ProtoConverter::visit(EntityValue const& _x) +{ + m_output << "\""; + string prefix; + switch (_x.type()) + { + case EntityValue::ENTITY: + prefix = "&"; + break; + case EntityValue::CHAR: + if (_x.ByteSizeLong() % 2) + prefix = "&#"; + else + prefix = "&#x"; + break; + case EntityValue::PEREF: + prefix = "%"; + break; + case EntityValue::FUZZ: + prefix = "fuzz"; + break; + case EntityValue_Type_EntityValue_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case EntityValue_Type_EntityValue_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + for (auto const& ref: _x.name()) + m_output << prefix << ref << ";"; + m_output << "\""; +} + +void ProtoConverter::visit(EntityDecl const& _x) +{ + m_output << "<!ENTITY "; + switch (_x.type()) + { + case EntityDecl::GEDECL: + m_output << _x.name() << " "; + visit(_x.ent()); + break; + case EntityDecl::PEDECL: + m_output << "% " << _x.name() << " "; + visit(_x.pedef()); + break; + case EntityDecl_Type_EntityDecl_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case EntityDecl_Type_EntityDecl_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + m_output << ">"; +} + +void ProtoConverter::visit(ConditionalSect const& _x) +{ + switch (_x.type()) + { + case ConditionalSect::INCLUDE: + m_output << "<![ INCLUDE ["; + visit(_x.ext()); + m_output << "]]>"; + break; + case ConditionalSect::IGNORE: + m_output << "<![ IGNORE ["; + for (auto const& str: _x.ignores()) + m_output << "<![" << removeNonAscii(str) << "]]>"; + m_output << "]]>"; + break; + case ConditionalSect::FUZZ: + m_output << "<![ FUZZ ["; + visit(_x.ext()); + m_output << "]]>"; + break; + case ConditionalSect_Type_ConditionalSect_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case ConditionalSect_Type_ConditionalSect_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + + +void ProtoConverter::visit(OneExtSubsetDecl const& _x) +{ + switch (_x.extsubset_oneof_case()) + { + case OneExtSubsetDecl::kM: + visit(_x.m()); + break; + case OneExtSubsetDecl::kC: + visit(_x.c()); + break; + case OneExtSubsetDecl::EXTSUBSET_ONEOF_NOT_SET: + break; + } +} + + +void ProtoConverter::visit(ExtSubsetDecl const& _x) +{ + for (auto const& decl: _x.decls()) + visit(decl); +} + +void ProtoConverter::visit(CData const& _x) +{ + m_output << "<![CDATA[" << removeNonAscii(_x.data()) << "]]>"; +} + +void ProtoConverter::visit(MarkupDecl const& _x) +{ + switch (_x.markup_oneof_case()) + { + case MarkupDecl::kE: + visit(_x.e()); + break; + case MarkupDecl::kA: + visit(_x.a()); + break; + case MarkupDecl::kN: + visit(_x.n()); + break; + case MarkupDecl::kM: + visit(_x.m()); + break; + case MarkupDecl::kEntity: + visit(_x.entity()); + break; + case MarkupDecl::kExt: + visit(_x.ext()); + break; + case MarkupDecl::MARKUP_ONEOF_NOT_SET: + break; + } +} + +/// Returns predefined element from an Element_Id enum +/// @param _x is an enum that holds the desired type of predefined value +/// @param _prop is a string that holds the value of the desired type +/// @return string holding the predefined value of the form +/// name attribute=\"value\" +string ProtoConverter::getPredefined(Element_Id _x, string const& _prop) +{ + string output{}; + switch (_x) + { + case Element::XIINCLUDE: + case Element::XIFALLBACK: + case Element::XIHREF: + output = "xi:include href=\"fuzz.xml\""; + case Element::XIPARSE: + output = "xi:include parse=\"xml\""; + case Element::XIXPOINTER: + output = "xi:include xpointer=\"" + removeNonAscii(_prop) + "\""; + case Element::XIENCODING: + output = "xi:include encoding=\"" + removeNonAscii(_prop) + "\""; + case Element::XIACCEPT: + output = "xi:include accept=\"" + removeNonAscii(_prop) + "\""; + case Element::XIACCEPTLANG: + output = "xi:include accept-language=\"" + removeNonAscii(_prop) + "\""; + case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_: + case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_: + output = "xi:fuzz xifuzz=\"fuzz\""; + } + return output; +} + +/// Returns uri string for a given Element_Id type +string ProtoConverter::getUri(Element_Id _x) +{ + switch (_x) + { + case Element::XIINCLUDE: + case Element::XIFALLBACK: + case Element::XIHREF: + case Element::XIPARSE: + case Element::XIXPOINTER: + case Element::XIENCODING: + case Element::XIACCEPT: + case Element::XIACCEPTLANG: + case Element_Id_Element_Id_INT_MIN_SENTINEL_DO_NOT_USE_: + case Element_Id_Element_Id_INT_MAX_SENTINEL_DO_NOT_USE_: + return s_XInclude; + } +} + +void ProtoConverter::visit(Element const& _x) +{ + // Predefined child node + string child = {}; + // Predefined uri for child node + string pUri = {}; + // Element name + string name = removeNonAscii(_x.name()); + + switch (_x.type()) + { + case Element::PREDEFINED: + child = getPredefined(_x.id(), _x.childprop()); + pUri = getUri(_x.id()); + break; + case Element::FUZZ: + case Element_Type_Element_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case Element_Type_Element_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + + // <name k1=v1 k2=v2 k3=v3> + // <content> + // </name> + + // Start name tag: Must be Ascii? + m_output << "<" << name << " "; + + // Add uri to name tag + if (!pUri.empty()) + m_output << pUri << " "; + for (auto const& prop: _x.kv()) + visit(prop); + m_output << ">\n"; + + // Add attribute + if (!child.empty()) + m_output << "<" << child << "/>\n"; + + // Add content + visit(_x.content()); + + // Close name tag + m_output << "</" << name << ">\n"; +} + +void ProtoConverter::visit(ExternalId const& _x) +{ + switch (_x.type()) + { + case ExternalId::SYSTEM: + m_output << "SYSTEM " << "\"" << removeNonAscii(_x.system()) << "\""; + break; + case ExternalId::PUBLIC: + m_output << "PUBLIC " << "\"" << removeNonAscii(_x.pub()) << "\"" + << " " << "\"" << removeNonAscii(_x.system()) << "\""; + break; + case ExternalId::FUZZ: + m_output << "FUZZ " << "\"" << removeNonAscii(_x.pub()) << "\""; + break; + case ExternalId_Type_ExternalId_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case ExternalId_Type_ExternalId_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } +} + +void ProtoConverter::visit(DocTypeDecl const& _x) +{ + m_output << "<!DOCTYPE " << removeNonAscii(_x.name()) << " "; + visit(_x.ext()); + m_output << "["; + for (auto const& m: _x.mdecl()) + visit(m); + m_output << "]"; + m_output << ">\n"; +} + +void ProtoConverter::visit(VersionNum const& _x) +{ + switch (_x.type()) + { + case VersionNum::STANDARD: + m_output << "\"1.0\""; + break; + case VersionNum::FUZZ: + case VersionNum_Type_VersionNum_Type_INT_MIN_SENTINEL_DO_NOT_USE_: + case VersionNum_Type_VersionNum_Type_INT_MAX_SENTINEL_DO_NOT_USE_: + m_output << "\"" << _x.major() << "." << _x.minor() << "\""; + break; + } +} + +void ProtoConverter::visit(Encodings const& _x) +{ + m_output << " encoding=\""; + switch (_x.name()) + { + case Encodings::BIG5: + m_output << "BIG5"; + break; + case Encodings::EUCJP: + m_output << "EUC-JP"; + break; + case Encodings::EUCKR: + m_output << "EUC-KR"; + break; + case Encodings::GB18030: + m_output << "GB18030"; + break; + case Encodings::ISO2022JP: + m_output << "ISO-2022-JP"; + break; + case Encodings::ISO2022KR: + m_output << "ISO-2022-KR"; + break; + case Encodings::ISO88591: + m_output << "ISO-8859-1"; + break; + case Encodings::ISO88592: + m_output << "ISO-8859-2"; + break; + case Encodings::ISO88593: + m_output << "ISO-8859-3"; + break; + case Encodings::ISO88594: + m_output << "ISO-8859-4"; + break; + case Encodings::ISO88595: + m_output << "ISO-8859-5"; + break; + case Encodings::ISO88596: + m_output << "ISO-8859-6"; + break; + case Encodings::ISO88597: + m_output << "ISO-8859-7"; + break; + case Encodings::ISO88598: + m_output << "ISO-8859-8"; + break; + case Encodings::ISO88599: + m_output << "ISO-8859-9"; + break; + case Encodings::SHIFTJIS: + m_output << "SHIFT_JIS"; + break; + case Encodings::TIS620: + m_output << "TIS-620"; + break; + case Encodings::USASCII: + m_output << "US-ASCII"; + break; + case Encodings::UTF8: + m_output << "UTF-8"; + break; + case Encodings::UTF16: + m_output << "UTF-16"; + break; + case Encodings::UTF16BE: + m_output << "UTF-16BE"; + break; + case Encodings::UTF16LE: + m_output << "UTF-16LE"; + break; + case Encodings::WINDOWS31J: + m_output << "WINDOWS-31J"; + break; + case Encodings::WINDOWS1255: + m_output << "WINDOWS-1255"; + break; + case Encodings::WINDOWS1256: + m_output << "WINDOWS-1256"; + break; + case Encodings::FUZZ: + m_output << removeNonAscii(_x.fuzz()); + break; + case Encodings_Enc_Encodings_Enc_INT_MIN_SENTINEL_DO_NOT_USE_: + case Encodings_Enc_Encodings_Enc_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + m_output << "\""; +} + +void ProtoConverter::visit(XmlDeclaration const& _x) +{ + m_output << R"(<?xml version=)"; + visit(_x.ver()); + visit(_x.enc()); + switch (_x.standalone()) + { + case XmlDeclaration::YES: + m_output << " standalone=\'yes\'"; + break; + case XmlDeclaration::NO: + m_output << " standalone=\'no\'"; + break; + case XmlDeclaration_Standalone_XmlDeclaration_Standalone_INT_MIN_SENTINEL_DO_NOT_USE_: + case XmlDeclaration_Standalone_XmlDeclaration_Standalone_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + m_output << "?>\n"; +} + +void ProtoConverter::visit(XmlDocument const& _x) +{ + visit(_x.p()); + for (auto const& element: _x.e()) + visit(element); +} + +string ProtoConverter::protoToString(XmlDocument const& _x) +{ + visit(_x); + return m_output.str(); +}
\ No newline at end of file diff --git a/projects/xerces-c/xmlProtoConverter.h b/projects/xerces-c/xmlProtoConverter.h new file mode 100644 index 000000000..a6333f1b3 --- /dev/null +++ b/projects/xerces-c/xmlProtoConverter.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2019 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <sstream> + +#include "xml.pb.h" + +namespace xmlProtoFuzzer { +class ProtoConverter +{ +public: + ProtoConverter() = default; + + ProtoConverter(ProtoConverter const&) = delete; + + ProtoConverter(ProtoConverter&&) = delete; + + std::string protoToString(XmlDocument const&); + +private: + void visit(Prolog const&); + + void visit(ProcessingInstruction const&); + + void visit(ExternalId const&); + + void visit(DocTypeDecl const&); + + void visit(VersionNum const&); + + void visit(Encodings const&); + + void visit(Misc const&); + + void visit(KeyValue const&); + + void visit(Element const&); + + void visit(ElementDecl const&); + + void visit(AttValue const&); + + void visit(DefaultDecl const&); + + void visit(AttDef const&); + + void visit(AttListDecl const&); + + void visit(NotationDecl const&); + + void visit(EntityDecl const&); + + void visit(EntityValue const&); + + void visit(EntityDef const&); + + void visit(PEDef const&); + + void visit(NDataDecl const&); + + void visit(ConditionalSect const&); + + void visit(OneExtSubsetDecl const&); + + void visit(ExtSubsetDecl const&); + + void visit(MarkupDecl const&); + + void visit(CData const&); + + void visit(Content const&); + + void visit(XmlDeclaration const&); + + void visit(XmlDocument const&); + + std::string removeNonAscii(std::string const&); + std::string getUri(Element_Id _x); + std::string getPredefined(Element_Id _x, std::string const&); + + std::ostringstream m_output; + + static constexpr auto s_XInclude = "xmlns:xi=\"http://www.w3.org/2001/XInclude\""; +}; +} + |