1 files changed, 15 insertions, 601 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index bd52113..d88b1e9 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -1,614 +1,28 @@
 """
 Python Markdown
-===============
 
-Python Markdown converts Markdown to HTML and can be used as a library or
-called from the command line.
+A Python implementation of John Gruber's Markdown.
 
-## Basic usage as a module:
+Documentation: https://python-markdown.github.io/
+GitHub: https://github.com/Python-Markdown/markdown/
+PyPI: https://pypi.org/project/Markdown/
 
-    import markdown
-    md = Markdown()
-    html = md.convert(your_text_string)
+Started by Manfred Stienstra (http://www.dwerg.net/).
+Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+Currently maintained by Waylan Limberg (https://github.com/waylan),
+Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
 
-## Basic use from the command line:
-
-    markdown source.txt > destination.html
-
-Run "markdown --help" to see more options.
-
-## Extensions
-
-See <http://www.freewisdom.org/projects/python-markdown/> for more
-information and instructions on how to extend the functionality of
-Python Markdown.  Read that before you try modifying this file.
-
-## Authors and License
-
-Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
-maintained  by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
-Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
-
-Contact: markdown@freewisdom.org
-
-Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
-Copyright 200? Django Software Foundation (OrderedDict implementation)
+Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
 Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
 Copyright 2004 Manfred Stienstra (the original version)
 
-License: BSD (see docs/LICENSE for details).
-"""
-
-version = "2.0.3"
-version_info = (2,0,3, "Final")
-
-import re
-import codecs
-import sys
-import warnings
-import logging
-from logging import DEBUG, INFO, WARN, ERROR, CRITICAL
-
-
-"""
-CONSTANTS
-=============================================================================
-"""
-
-"""
-Constants you might want to modify
------------------------------------------------------------------------------
-"""
-
-# default logging level for command-line use
-COMMAND_LINE_LOGGING_LEVEL = CRITICAL
-TAB_LENGTH = 4               # expand tabs to this many spaces
-ENABLE_ATTRIBUTES = True     # @id = xyz -> <... id="xyz">
-SMART_EMPHASIS = True        # this_or_that does not become this<i>or</i>that
-DEFAULT_OUTPUT_FORMAT = 'xhtml1'     # xhtml or html4 output
-HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
-BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
-                                  "|script|noscript|form|fieldset|iframe|math"
-                                  "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody"
-                                  "|tr|th|td")
-DOC_TAG = "div"     # Element used to wrap document - later removed
-
-# Placeholders
-STX = u'\u0002'  # Use STX ("Start of text") for start-of-placeholder
-ETX = u'\u0003'  # Use ETX ("End of text") for end-of-placeholder
-INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
-INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
-AMP_SUBSTITUTE = STX+"amp"+ETX
-
-
-"""
-Constants you probably do not need to change
------------------------------------------------------------------------------
-"""
-
-RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
-                     # Hebrew (0590-05FF), Arabic (0600-06FF),
-                     # Syriac (0700-074F), Arabic supplement (0750-077F),
-                     # Thaana (0780-07BF), Nko (07C0-07FF).
-                    (u'\u2D30', u'\u2D7F'), # Tifinagh
-                    )
-
-
-"""
-AUXILIARY GLOBAL FUNCTIONS
-=============================================================================
-"""
-
-
-def message(level, text):
-    """ A wrapper method for logging debug messages. """
-    logger =  logging.getLogger('MARKDOWN')
-    if logger.handlers:
-        # The logger is configured
-        logger.log(level, text)
-        if level > WARN:
-            sys.exit(0)
-    elif level > WARN:
-        raise MarkdownException, text
-    else:
-        warnings.warn(text, MarkdownWarning)
-
-
-def isBlockLevel(tag):
-    """Check if the tag is a block level HTML tag."""
-    return BLOCK_LEVEL_ELEMENTS.match(tag)
-
-"""
-MISC AUXILIARY CLASSES
-=============================================================================
+License: BSD (see LICENSE.md for details).
 """
 
-class AtomicString(unicode):
-    """A string which should not be further processed."""
-    pass
-
-
-class MarkdownException(Exception):
-    """ A Markdown Exception. """
-    pass
-
-
-class MarkdownWarning(Warning):
-    """ A Markdown Warning. """
-    pass
-
-
-"""
-OVERALL DESIGN
-=============================================================================
-
-Markdown processing takes place in four steps:
-
-1. A bunch of "preprocessors" munge the input text.
-2. BlockParser() parses the high-level structural elements of the
-   pre-processed text into an ElementTree.
-3. A bunch of "treeprocessors" are run against the ElementTree. One such
-   treeprocessor runs InlinePatterns against the ElementTree, detecting inline
-   markup.
-4. Some post-processors are run against the text after the ElementTree has
-   been serialized into text.
-5. The output is written to a string.
-
-Those steps are put together by the Markdown() class.
-
-"""
-
-import preprocessors
-import blockprocessors
-import treeprocessors
-import inlinepatterns
-import postprocessors
-import blockparser
-import etree_loader
-import odict
-
-# Extensions should use "markdown.etree" instead of "etree" (or do `from
-# markdown import etree`).  Do not import it by yourself.
-
-etree = etree_loader.importETree()
-
-# Adds the ability to output html4
-import html4
-
-
-class Markdown:
-    """Convert Markdown to HTML."""
-
-    def __init__(self,
-                 extensions=[],
-                 extension_configs={},
-                 safe_mode = False, 
-                 output_format=DEFAULT_OUTPUT_FORMAT):
-        """
-        Creates a new Markdown instance.
-
-        Keyword arguments:
-
-        * extensions: A list of extensions.
-           If they are of type string, the module mdx_name.py will be loaded.
-           If they are a subclass of markdown.Extension, they will be used
-           as-is.
-        * extension-configs: Configuration setting for extensions.
-        * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
-        * output_format: Format of output. Supported formats are:
-            * "xhtml1": Outputs XHTML 1.x. Default.
-            * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
-            * "html4": Outputs HTML 4
-            * "html": Outputs latest supported version of HTML (currently HTML 4).
-            Note that it is suggested that the more specific formats ("xhtml1" 
-            and "html4") be used as "xhtml" or "html" may change in the future
-            if it makes sense at that time. 
-
-        """
-        
-        self.safeMode = safe_mode
-        self.registeredExtensions = []
-        self.docType = ""
-        self.stripTopLevelTags = True
-
-        # Preprocessors
-        self.preprocessors = odict.OrderedDict()
-        self.preprocessors["html_block"] = \
-                preprocessors.HtmlBlockPreprocessor(self)
-        self.preprocessors["reference"] = \
-                preprocessors.ReferencePreprocessor(self)
-        # footnote preprocessor will be inserted with "<reference"
-
-        # Block processors - ran by the parser
-        self.parser = blockparser.BlockParser()
-        self.parser.blockprocessors['empty'] = \
-                blockprocessors.EmptyBlockProcessor(self.parser)
-        self.parser.blockprocessors['indent'] = \
-                blockprocessors.ListIndentProcessor(self.parser)
-        self.parser.blockprocessors['code'] = \
-                blockprocessors.CodeBlockProcessor(self.parser)
-        self.parser.blockprocessors['hashheader'] = \
-                blockprocessors.HashHeaderProcessor(self.parser)
-        self.parser.blockprocessors['setextheader'] = \
-                blockprocessors.SetextHeaderProcessor(self.parser)
-        self.parser.blockprocessors['hr'] = \
-                blockprocessors.HRProcessor(self.parser)
-        self.parser.blockprocessors['olist'] = \
-                blockprocessors.OListProcessor(self.parser)
-        self.parser.blockprocessors['ulist'] = \
-                blockprocessors.UListProcessor(self.parser)
-        self.parser.blockprocessors['quote'] = \
-                blockprocessors.BlockQuoteProcessor(self.parser)
-        self.parser.blockprocessors['paragraph'] = \
-                blockprocessors.ParagraphProcessor(self.parser)
-
-
-        #self.prePatterns = []
-
-        # Inline patterns - Run on the tree
-        self.inlinePatterns = odict.OrderedDict()
-        self.inlinePatterns["backtick"] = \
-                inlinepatterns.BacktickPattern(inlinepatterns.BACKTICK_RE)
-        self.inlinePatterns["escape"] = \
-                inlinepatterns.SimpleTextPattern(inlinepatterns.ESCAPE_RE)
-        self.inlinePatterns["reference"] = \
-            inlinepatterns.ReferencePattern(inlinepatterns.REFERENCE_RE, self)
-        self.inlinePatterns["link"] = \
-                inlinepatterns.LinkPattern(inlinepatterns.LINK_RE, self)
-        self.inlinePatterns["image_link"] = \
-                inlinepatterns.ImagePattern(inlinepatterns.IMAGE_LINK_RE, self)
-        self.inlinePatterns["image_reference"] = \
-            inlinepatterns.ImageReferencePattern(inlinepatterns.IMAGE_REFERENCE_RE, self)
-        self.inlinePatterns["autolink"] = \
-            inlinepatterns.AutolinkPattern(inlinepatterns.AUTOLINK_RE, self)
-        self.inlinePatterns["automail"] = \
-            inlinepatterns.AutomailPattern(inlinepatterns.AUTOMAIL_RE, self)
-        self.inlinePatterns["linebreak2"] = \
-            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_2_RE, 'br')
-        self.inlinePatterns["linebreak"] = \
-            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_RE, 'br')
-        self.inlinePatterns["html"] = \
-                inlinepatterns.HtmlPattern(inlinepatterns.HTML_RE, self)
-        self.inlinePatterns["entity"] = \
-                inlinepatterns.HtmlPattern(inlinepatterns.ENTITY_RE, self)
-        self.inlinePatterns["not_strong"] = \
-                inlinepatterns.SimpleTextPattern(inlinepatterns.NOT_STRONG_RE)
-        self.inlinePatterns["strong_em"] = \
-            inlinepatterns.DoubleTagPattern(inlinepatterns.STRONG_EM_RE, 'strong,em')
-        self.inlinePatterns["strong"] = \
-            inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong')
-        self.inlinePatterns["emphasis"] = \
-            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em')
-        self.inlinePatterns["emphasis2"] = \
-            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_2_RE, 'em')
-        # The order of the handlers matters!!!
-
-
-        # Tree processors - run once we have a basic parse.
-        self.treeprocessors = odict.OrderedDict()
-        self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
-        self.treeprocessors["prettify"] = \
-                treeprocessors.PrettifyTreeprocessor(self)
-
-        # Postprocessors - finishing touches.
-        self.postprocessors = odict.OrderedDict()
-        self.postprocessors["raw_html"] = \
-                postprocessors.RawHtmlPostprocessor(self)
-        self.postprocessors["amp_substitute"] = \
-                postprocessors.AndSubstitutePostprocessor()
-        # footnote postprocessor will be inserted with ">amp_substitute"
-
-        # Map format keys to serializers
-        self.output_formats = {
-            'html'  : html4.to_html_string, 
-            'html4' : html4.to_html_string,
-            'xhtml' : etree.tostring, 
-            'xhtml1': etree.tostring,
-        }
-
-        self.references = {}
-        self.htmlStash = preprocessors.HtmlStash()
-        self.registerExtensions(extensions = extensions,
-                                configs = extension_configs)
-        self.set_output_format(output_format)
-        self.reset()
-
-    def registerExtensions(self, extensions, configs):
-        """
-        Register extensions with this instance of Markdown.
-
-        Keyword aurguments:
-
-        * extensions: A list of extensions, which can either
-           be strings or objects.  See the docstring on Markdown.
-        * configs: A dictionary mapping module names to config options.
-
-        """
-        for ext in extensions:
-            if isinstance(ext, basestring):
-                ext = load_extension(ext, configs.get(ext, []))
-            if isinstance(ext, Extension):
-                try:
-                    ext.extendMarkdown(self, globals())
-                except NotImplementedError, e:
-                    message(ERROR, e)
-            else:
-                message(ERROR, 'Extension "%s.%s" must be of type: "markdown.Extension".' \
-                    % (ext.__class__.__module__, ext.__class__.__name__))
-
-    def registerExtension(self, extension):
-        """ This gets called by the extension """
-        self.registeredExtensions.append(extension)
-
-    def reset(self):
-        """
-        Resets all state variables so that we can start with a new text.
-        """
-        self.htmlStash.reset()
-        self.references.clear()
-
-        for extension in self.registeredExtensions:
-            extension.reset()
-
-    def set_output_format(self, format):
-        """ Set the output format for the class instance. """
-        try:
-            self.serializer = self.output_formats[format.lower()]
-        except KeyError:
-            message(CRITICAL, 'Invalid Output Format: "%s". Use one of %s.' \
-                               % (format, self.output_formats.keys()))
-
-    def convert(self, source):
-        """
-        Convert markdown to serialized XHTML or HTML.
-
-        Keyword arguments:
-
-        * source: Source text as a Unicode string.
-
-        """
-
-        # Fixup the source text
-        if not source.strip():
-            return u""  # a blank unicode string
-        try:
-            source = unicode(source)
-        except UnicodeDecodeError:
-            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
-            return u""
-
-        source = source.replace(STX, "").replace(ETX, "")
-        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
-        source = re.sub(r'\n\s+\n', '\n\n', source)
-        source = source.expandtabs(TAB_LENGTH)
-
-        # Split into lines and run the line preprocessors.
-        self.lines = source.split("\n")
-        for prep in self.preprocessors.values():
-            self.lines = prep.run(self.lines)
-
-        # Parse the high-level elements.
-        root = self.parser.parseDocument(self.lines).getroot()
-
-        # Run the tree-processors
-        for treeprocessor in self.treeprocessors.values():
-            newRoot = treeprocessor.run(root)
-            if newRoot:
-                root = newRoot
-
-        # Serialize _properly_.  Strip top-level tags.
-        output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8"))
-        if self.stripTopLevelTags:
-            try:
-                start = output.index('<%s>'%DOC_TAG)+len(DOC_TAG)+2
-                end = output.rindex('</%s>'%DOC_TAG)
-                output = output[start:end].strip()
-            except ValueError:
-                if output.strip().endswith('<%s />'%DOC_TAG):
-                    # We have an empty document
-                    output = ''
-                else:
-                    # We have a serious problem
-                    message(CRITICAL, 'Failed to strip top level tags.')
-
-        # Run the text post-processors
-        for pp in self.postprocessors.values():
-            output = pp.run(output)
-
-        return output.strip()
-
-    def convertFile(self, input=None, output=None, encoding=None):
-        """Converts a markdown file and returns the HTML as a unicode string.
-
-        Decodes the file using the provided encoding (defaults to utf-8),
-        passes the file content to markdown, and outputs the html to either
-        the provided stream or the file with provided name, using the same
-        encoding as the source file.
-
-        **Note:** This is the only place that decoding and encoding of unicode
-        takes place in Python-Markdown.  (All other code is unicode-in /
-        unicode-out.)
-
-        Keyword arguments:
-
-        * input: Name of source text file.
-        * output: Name of output file. Writes to stdout if `None`.
-        * encoding: Encoding of input and output files. Defaults to utf-8.
-
-        """
-
-        encoding = encoding or "utf-8"
-
-        # Read the source
-        input_file = codecs.open(input, mode="r", encoding=encoding)
-        text = input_file.read()
-        input_file.close()
-        text = text.lstrip(u'\ufeff') # remove the byte-order mark
-
-        # Convert
-        html = self.convert(text)
-
-        # Write to file or stdout
-        if isinstance(output, (str, unicode)):
-            output_file = codecs.open(output, "w", encoding=encoding)
-            output_file.write(html)
-            output_file.close()
-        else:
-            output.write(html.encode(encoding))
-
-
-"""
-Extensions
------------------------------------------------------------------------------
-"""
-
-class Extension:
-    """ Base class for extensions to subclass. """
-    def __init__(self, configs = {}):
-        """Create an instance of an Extention.
-
-        Keyword arguments:
-
-        * configs: A dict of configuration setting used by an Extension.
-        """
-        self.config = configs
-
-    def getConfig(self, key):
-        """ Return a setting for the given key or an empty string. """
-        if key in self.config:
-            return self.config[key][0]
-        else:
-            return ""
-
-    def getConfigInfo(self):
-        """ Return all config settings as a list of tuples. """
-        return [(key, self.config[key][1]) for key in self.config.keys()]
-
-    def setConfig(self, key, value):
-        """ Set a config setting for `key` with the given `value`. """
-        self.config[key][0] = value
-
-    def extendMarkdown(self, md, md_globals):
-        """
-        Add the various proccesors and patterns to the Markdown Instance.
-
-        This method must be overriden by every extension.
-
-        Keyword arguments:
-
-        * md: The Markdown instance.
-
-        * md_globals: Global variables in the markdown module namespace.
-
-        """
-        raise NotImplementedError, 'Extension "%s.%s" must define an "extendMarkdown"' \
-            'method.' % (self.__class__.__module__, self.__class__.__name__)
-
-
-def load_extension(ext_name, configs = []):
-    """Load extension by name, then return the module.
-
-    The extension name may contain arguments as part of the string in the
-    following format: "extname(key1=value1,key2=value2)"
-
-    """
-
-    # Parse extensions config params (ignore the order)
-    configs = dict(configs)
-    pos = ext_name.find("(") # find the first "("
-    if pos > 0:
-        ext_args = ext_name[pos+1:-1]
-        ext_name = ext_name[:pos]
-        pairs = [x.split("=") for x in ext_args.split(",")]
-        configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
-
-    # Setup the module names
-    ext_module = 'markdown.extensions'
-    module_name_new_style = '.'.join([ext_module, ext_name])
-    module_name_old_style = '_'.join(['mdx', ext_name])
-
-    # Try loading the extention first from one place, then another
-    try: # New style (markdown.extensons.<extension>)
-        module = __import__(module_name_new_style, {}, {}, [ext_module])
-    except ImportError:
-        try: # Old style (mdx.<extension>)
-            module = __import__(module_name_old_style)
-        except ImportError:
-           message(WARN, "Failed loading extension '%s' from '%s' or '%s'"
-               % (ext_name, module_name_new_style, module_name_old_style))
-           # Return None so we don't try to initiate none-existant extension
-           return None
-
-    # If the module is loaded successfully, we expect it to define a
-    # function called makeExtension()
-    try:
-        return module.makeExtension(configs.items())
-    except AttributeError:
-        message(CRITICAL, "Failed to initiate extension '%s'" % ext_name)
-
-
-def load_extensions(ext_names):
-    """Loads multiple extensions"""
-    extensions = []
-    for ext_name in ext_names:
-        extension = load_extension(ext_name)
-        if extension:
-            extensions.append(extension)
-    return extensions
-
-
-"""
-EXPORTED FUNCTIONS
-=============================================================================
-
-Those are the two functions we really mean to export: markdown() and
-markdownFromFile().
-"""
-
-def markdown(text,
-             extensions = [],
-             safe_mode = False,
-             output_format = DEFAULT_OUTPUT_FORMAT):
-    """Convert a markdown string to HTML and return HTML as a unicode string.
-
-    This is a shortcut function for `Markdown` class to cover the most
-    basic use case.  It initializes an instance of Markdown, loads the
-    necessary extensions and runs the parser on the given text.
-
-    Keyword arguments:
-
-    * text: Markdown formatted text as Unicode or ASCII string.
-    * extensions: A list of extensions or extension names (may contain config args).
-    * safe_mode: Disallow raw html.  One of "remove", "replace" or "escape".
-    * output_format: Format of output. Supported formats are:
-        * "xhtml1": Outputs XHTML 1.x. Default.
-        * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
-        * "html4": Outputs HTML 4
-        * "html": Outputs latest supported version of HTML (currently HTML 4).
-        Note that it is suggested that the more specific formats ("xhtml1" 
-        and "html4") be used as "xhtml" or "html" may change in the future
-        if it makes sense at that time. 
-
-    Returns: An HTML document as a string.
-
-    """
-    md = Markdown(extensions=load_extensions(extensions),
-                  safe_mode=safe_mode, 
-                  output_format=output_format)
-    return md.convert(text)
-
-
-def markdownFromFile(input = None,
-                     output = None,
-                     extensions = [],
-                     encoding = None,
-                     safe_mode = False,
-                     output_format = DEFAULT_OUTPUT_FORMAT):
-    """Read markdown code from a file and write it to a file or a stream."""
-    md = Markdown(extensions=load_extensions(extensions), 
-                  safe_mode=safe_mode,
-                  output_format=output_format)
-    md.convertFile(input, output, encoding)
-
+from .core import Markdown, markdown, markdownFromFile
+from .__meta__ import __version__, __version_info__  # noqa
 
+# For backward compatibility as some extensions expect it...
+from .extensions import Extension  # noqa
 
+__all__ = ['Markdown', 'markdown', 'markdownFromFile']