diff options
Diffstat (limited to 'markdown/serializers.py')
-rw-r--r-- | markdown/serializers.py | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/markdown/serializers.py b/markdown/serializers.py new file mode 100644 index 0000000..59bab18 --- /dev/null +++ b/markdown/serializers.py @@ -0,0 +1,189 @@ +# markdown/searializers.py +# +# Add x/html serialization to Elementree +# Taken from ElementTree 1.3 preview with slight modifications +# +# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# https://www.pythonware.com/ +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2007 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + + +from xml.etree.ElementTree import ProcessingInstruction +from xml.etree.ElementTree import Comment, ElementTree, QName +import re + +__all__ = ['to_html_string', 'to_xhtml_string'] + +HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", + "img", "input", "isindex", "link", "meta", "param") +RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I) + +try: + HTML_EMPTY = set(HTML_EMPTY) +except NameError: # pragma: no cover + pass + + +def _raise_serialization_error(text): # pragma: no cover + raise TypeError( + "cannot serialize {!r} (type {})".format(text, type(text).__name__) + ) + + +def _escape_cdata(text): + # escape character data + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 character, or so. assume that's, by far, + # the most common case in most applications. + if "&" in text: + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _escape_attrib(text): + # escape attribute value + try: + if "&" in text: + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + if "\n" in text: + text = text.replace("\n", " ") + return text + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _escape_attrib_html(text): + # escape attribute value + try: + if "&" in text: + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + return text + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _serialize_html(write, elem, format): + tag = elem.tag + text = elem.text + if tag is Comment: + write("<!--%s-->" % _escape_cdata(text)) + elif tag is ProcessingInstruction: + write("<?%s?>" % _escape_cdata(text)) + elif tag is None: + if text: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, format) + else: + namespace_uri = None + if isinstance(tag, QName): + # QNAME objects store their data as a string: `{uri}tag` + if tag.text[:1] == "{": + namespace_uri, tag = tag.text[1:].split("}", 1) + else: + raise ValueError('QName objects must define a tag.') + write("<" + tag) + items = elem.items() + if items: + items = sorted(items) # lexical order + for k, v in items: + if isinstance(k, QName): + # Assume a text only QName + k = k.text + if isinstance(v, QName): + # Assume a text only QName + v = v.text + else: + v = _escape_attrib_html(v) + if k == v and format == 'html': + # handle boolean attributes + write(" %s" % v) + else: + write(' {}="{}"'.format(k, v)) + if namespace_uri: + write(' xmlns="%s"' % (_escape_attrib(namespace_uri))) + if format == "xhtml" and tag.lower() in HTML_EMPTY: + write(" />") + else: + write(">") + if text: + if tag.lower() in ["script", "style"]: + write(text) + else: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, format) + if tag.lower() not in HTML_EMPTY: + write("</" + tag + ">") + if elem.tail: + write(_escape_cdata(elem.tail)) + + +def _write_html(root, format="html"): + assert root is not None + data = [] + write = data.append + _serialize_html(write, root, format) + return "".join(data) + + +# -------------------------------------------------------------------- +# public functions + +def to_html_string(element): + return _write_html(ElementTree(element).getroot(), format="html") + + +def to_xhtml_string(element): + return _write_html(ElementTree(element).getroot(), format="xhtml") |