aboutsummaryrefslogtreecommitdiff
path: root/markdown/serializers.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown/serializers.py')
-rw-r--r--markdown/serializers.py189
1 files changed, 189 insertions, 0 deletions
diff --git a/markdown/serializers.py b/markdown/serializers.py
new file mode 100644
index 0000000..59bab18
--- /dev/null
+++ b/markdown/serializers.py
@@ -0,0 +1,189 @@
+# markdown/searializers.py
+#
+# Add x/html serialization to Elementree
+# Taken from ElementTree 1.3 preview with slight modifications
+#
+# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
+#
+# fredrik@pythonware.com
+# https://www.pythonware.com/
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2007 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+
+from xml.etree.ElementTree import ProcessingInstruction
+from xml.etree.ElementTree import Comment, ElementTree, QName
+import re
+
+__all__ = ['to_html_string', 'to_xhtml_string']
+
+HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
+ "img", "input", "isindex", "link", "meta", "param")
+RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I)
+
+try:
+ HTML_EMPTY = set(HTML_EMPTY)
+except NameError: # pragma: no cover
+ pass
+
+
+def _raise_serialization_error(text): # pragma: no cover
+ raise TypeError(
+ "cannot serialize {!r} (type {})".format(text, type(text).__name__)
+ )
+
+
+def _escape_cdata(text):
+ # escape character data
+ try:
+ # it's worth avoiding do-nothing calls for strings that are
+ # shorter than 500 character, or so. assume that's, by far,
+ # the most common case in most applications.
+ if "&" in text:
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&', text)
+ if "<" in text:
+ text = text.replace("<", "&lt;")
+ if ">" in text:
+ text = text.replace(">", "&gt;")
+ return text
+ except (TypeError, AttributeError): # pragma: no cover
+ _raise_serialization_error(text)
+
+
+def _escape_attrib(text):
+ # escape attribute value
+ try:
+ if "&" in text:
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
+ if "<" in text:
+ text = text.replace("<", "&lt;")
+ if ">" in text:
+ text = text.replace(">", "&gt;")
+ if "\"" in text:
+ text = text.replace("\"", "&quot;")
+ if "\n" in text:
+ text = text.replace("\n", "&#10;")
+ return text
+ except (TypeError, AttributeError): # pragma: no cover
+ _raise_serialization_error(text)
+
+
+def _escape_attrib_html(text):
+ # escape attribute value
+ try:
+ if "&" in text:
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
+ if "<" in text:
+ text = text.replace("<", "&lt;")
+ if ">" in text:
+ text = text.replace(">", "&gt;")
+ if "\"" in text:
+ text = text.replace("\"", "&quot;")
+ return text
+ except (TypeError, AttributeError): # pragma: no cover
+ _raise_serialization_error(text)
+
+
+def _serialize_html(write, elem, format):
+ tag = elem.tag
+ text = elem.text
+ if tag is Comment:
+ write("<!--%s-->" % _escape_cdata(text))
+ elif tag is ProcessingInstruction:
+ write("<?%s?>" % _escape_cdata(text))
+ elif tag is None:
+ if text:
+ write(_escape_cdata(text))
+ for e in elem:
+ _serialize_html(write, e, format)
+ else:
+ namespace_uri = None
+ if isinstance(tag, QName):
+ # QNAME objects store their data as a string: `{uri}tag`
+ if tag.text[:1] == "{":
+ namespace_uri, tag = tag.text[1:].split("}", 1)
+ else:
+ raise ValueError('QName objects must define a tag.')
+ write("<" + tag)
+ items = elem.items()
+ if items:
+ items = sorted(items) # lexical order
+ for k, v in items:
+ if isinstance(k, QName):
+ # Assume a text only QName
+ k = k.text
+ if isinstance(v, QName):
+ # Assume a text only QName
+ v = v.text
+ else:
+ v = _escape_attrib_html(v)
+ if k == v and format == 'html':
+ # handle boolean attributes
+ write(" %s" % v)
+ else:
+ write(' {}="{}"'.format(k, v))
+ if namespace_uri:
+ write(' xmlns="%s"' % (_escape_attrib(namespace_uri)))
+ if format == "xhtml" and tag.lower() in HTML_EMPTY:
+ write(" />")
+ else:
+ write(">")
+ if text:
+ if tag.lower() in ["script", "style"]:
+ write(text)
+ else:
+ write(_escape_cdata(text))
+ for e in elem:
+ _serialize_html(write, e, format)
+ if tag.lower() not in HTML_EMPTY:
+ write("</" + tag + ">")
+ if elem.tail:
+ write(_escape_cdata(elem.tail))
+
+
+def _write_html(root, format="html"):
+ assert root is not None
+ data = []
+ write = data.append
+ _serialize_html(write, root, format)
+ return "".join(data)
+
+
+# --------------------------------------------------------------------
+# public functions
+
+def to_html_string(element):
+ return _write_html(ElementTree(element).getroot(), format="html")
+
+
+def to_xhtml_string(element):
+ return _write_html(ElementTree(element).getroot(), format="xhtml")