aboutsummaryrefslogtreecommitdiff
path: root/markdown/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown/__init__.py')
-rw-r--r--markdown/__init__.py616
1 files changed, 15 insertions, 601 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index bd52113..d88b1e9 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -1,614 +1,28 @@
"""
Python Markdown
-===============
-Python Markdown converts Markdown to HTML and can be used as a library or
-called from the command line.
+A Python implementation of John Gruber's Markdown.
-## Basic usage as a module:
+Documentation: https://python-markdown.github.io/
+GitHub: https://github.com/Python-Markdown/markdown/
+PyPI: https://pypi.org/project/Markdown/
- import markdown
- md = Markdown()
- html = md.convert(your_text_string)
+Started by Manfred Stienstra (http://www.dwerg.net/).
+Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+Currently maintained by Waylan Limberg (https://github.com/waylan),
+Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-## Basic use from the command line:
-
- markdown source.txt > destination.html
-
-Run "markdown --help" to see more options.
-
-## Extensions
-
-See <http://www.freewisdom.org/projects/python-markdown/> for more
-information and instructions on how to extend the functionality of
-Python Markdown. Read that before you try modifying this file.
-
-## Authors and License
-
-Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and
-maintained by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
-Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
-
-Contact: markdown@freewisdom.org
-
-Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
-Copyright 200? Django Software Foundation (OrderedDict implementation)
+Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
Copyright 2004 Manfred Stienstra (the original version)
-License: BSD (see docs/LICENSE for details).
-"""
-
-version = "2.0.3"
-version_info = (2,0,3, "Final")
-
-import re
-import codecs
-import sys
-import warnings
-import logging
-from logging import DEBUG, INFO, WARN, ERROR, CRITICAL
-
-
-"""
-CONSTANTS
-=============================================================================
-"""
-
-"""
-Constants you might want to modify
------------------------------------------------------------------------------
-"""
-
-# default logging level for command-line use
-COMMAND_LINE_LOGGING_LEVEL = CRITICAL
-TAB_LENGTH = 4 # expand tabs to this many spaces
-ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz">
-SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that
-DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output
-HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
-BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
- "|script|noscript|form|fieldset|iframe|math"
- "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody"
- "|tr|th|td")
-DOC_TAG = "div" # Element used to wrap document - later removed
-
-# Placeholders
-STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder
-ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder
-INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
-INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
-AMP_SUBSTITUTE = STX+"amp"+ETX
-
-
-"""
-Constants you probably do not need to change
------------------------------------------------------------------------------
-"""
-
-RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
- # Hebrew (0590-05FF), Arabic (0600-06FF),
- # Syriac (0700-074F), Arabic supplement (0750-077F),
- # Thaana (0780-07BF), Nko (07C0-07FF).
- (u'\u2D30', u'\u2D7F'), # Tifinagh
- )
-
-
-"""
-AUXILIARY GLOBAL FUNCTIONS
-=============================================================================
-"""
-
-
-def message(level, text):
- """ A wrapper method for logging debug messages. """
- logger = logging.getLogger('MARKDOWN')
- if logger.handlers:
- # The logger is configured
- logger.log(level, text)
- if level > WARN:
- sys.exit(0)
- elif level > WARN:
- raise MarkdownException, text
- else:
- warnings.warn(text, MarkdownWarning)
-
-
-def isBlockLevel(tag):
- """Check if the tag is a block level HTML tag."""
- return BLOCK_LEVEL_ELEMENTS.match(tag)
-
-"""
-MISC AUXILIARY CLASSES
-=============================================================================
+License: BSD (see LICENSE.md for details).
"""
-class AtomicString(unicode):
- """A string which should not be further processed."""
- pass
-
-
-class MarkdownException(Exception):
- """ A Markdown Exception. """
- pass
-
-
-class MarkdownWarning(Warning):
- """ A Markdown Warning. """
- pass
-
-
-"""
-OVERALL DESIGN
-=============================================================================
-
-Markdown processing takes place in four steps:
-
-1. A bunch of "preprocessors" munge the input text.
-2. BlockParser() parses the high-level structural elements of the
- pre-processed text into an ElementTree.
-3. A bunch of "treeprocessors" are run against the ElementTree. One such
- treeprocessor runs InlinePatterns against the ElementTree, detecting inline
- markup.
-4. Some post-processors are run against the text after the ElementTree has
- been serialized into text.
-5. The output is written to a string.
-
-Those steps are put together by the Markdown() class.
-
-"""
-
-import preprocessors
-import blockprocessors
-import treeprocessors
-import inlinepatterns
-import postprocessors
-import blockparser
-import etree_loader
-import odict
-
-# Extensions should use "markdown.etree" instead of "etree" (or do `from
-# markdown import etree`). Do not import it by yourself.
-
-etree = etree_loader.importETree()
-
-# Adds the ability to output html4
-import html4
-
-
-class Markdown:
- """Convert Markdown to HTML."""
-
- def __init__(self,
- extensions=[],
- extension_configs={},
- safe_mode = False,
- output_format=DEFAULT_OUTPUT_FORMAT):
- """
- Creates a new Markdown instance.
-
- Keyword arguments:
-
- * extensions: A list of extensions.
- If they are of type string, the module mdx_name.py will be loaded.
- If they are a subclass of markdown.Extension, they will be used
- as-is.
- * extension-configs: Configuration setting for extensions.
- * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
- * output_format: Format of output. Supported formats are:
- * "xhtml1": Outputs XHTML 1.x. Default.
- * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
- * "html4": Outputs HTML 4
- * "html": Outputs latest supported version of HTML (currently HTML 4).
- Note that it is suggested that the more specific formats ("xhtml1"
- and "html4") be used as "xhtml" or "html" may change in the future
- if it makes sense at that time.
-
- """
-
- self.safeMode = safe_mode
- self.registeredExtensions = []
- self.docType = ""
- self.stripTopLevelTags = True
-
- # Preprocessors
- self.preprocessors = odict.OrderedDict()
- self.preprocessors["html_block"] = \
- preprocessors.HtmlBlockPreprocessor(self)
- self.preprocessors["reference"] = \
- preprocessors.ReferencePreprocessor(self)
- # footnote preprocessor will be inserted with "<reference"
-
- # Block processors - ran by the parser
- self.parser = blockparser.BlockParser()
- self.parser.blockprocessors['empty'] = \
- blockprocessors.EmptyBlockProcessor(self.parser)
- self.parser.blockprocessors['indent'] = \
- blockprocessors.ListIndentProcessor(self.parser)
- self.parser.blockprocessors['code'] = \
- blockprocessors.CodeBlockProcessor(self.parser)
- self.parser.blockprocessors['hashheader'] = \
- blockprocessors.HashHeaderProcessor(self.parser)
- self.parser.blockprocessors['setextheader'] = \
- blockprocessors.SetextHeaderProcessor(self.parser)
- self.parser.blockprocessors['hr'] = \
- blockprocessors.HRProcessor(self.parser)
- self.parser.blockprocessors['olist'] = \
- blockprocessors.OListProcessor(self.parser)
- self.parser.blockprocessors['ulist'] = \
- blockprocessors.UListProcessor(self.parser)
- self.parser.blockprocessors['quote'] = \
- blockprocessors.BlockQuoteProcessor(self.parser)
- self.parser.blockprocessors['paragraph'] = \
- blockprocessors.ParagraphProcessor(self.parser)
-
-
- #self.prePatterns = []
-
- # Inline patterns - Run on the tree
- self.inlinePatterns = odict.OrderedDict()
- self.inlinePatterns["backtick"] = \
- inlinepatterns.BacktickPattern(inlinepatterns.BACKTICK_RE)
- self.inlinePatterns["escape"] = \
- inlinepatterns.SimpleTextPattern(inlinepatterns.ESCAPE_RE)
- self.inlinePatterns["reference"] = \
- inlinepatterns.ReferencePattern(inlinepatterns.REFERENCE_RE, self)
- self.inlinePatterns["link"] = \
- inlinepatterns.LinkPattern(inlinepatterns.LINK_RE, self)
- self.inlinePatterns["image_link"] = \
- inlinepatterns.ImagePattern(inlinepatterns.IMAGE_LINK_RE, self)
- self.inlinePatterns["image_reference"] = \
- inlinepatterns.ImageReferencePattern(inlinepatterns.IMAGE_REFERENCE_RE, self)
- self.inlinePatterns["autolink"] = \
- inlinepatterns.AutolinkPattern(inlinepatterns.AUTOLINK_RE, self)
- self.inlinePatterns["automail"] = \
- inlinepatterns.AutomailPattern(inlinepatterns.AUTOMAIL_RE, self)
- self.inlinePatterns["linebreak2"] = \
- inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_2_RE, 'br')
- self.inlinePatterns["linebreak"] = \
- inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_RE, 'br')
- self.inlinePatterns["html"] = \
- inlinepatterns.HtmlPattern(inlinepatterns.HTML_RE, self)
- self.inlinePatterns["entity"] = \
- inlinepatterns.HtmlPattern(inlinepatterns.ENTITY_RE, self)
- self.inlinePatterns["not_strong"] = \
- inlinepatterns.SimpleTextPattern(inlinepatterns.NOT_STRONG_RE)
- self.inlinePatterns["strong_em"] = \
- inlinepatterns.DoubleTagPattern(inlinepatterns.STRONG_EM_RE, 'strong,em')
- self.inlinePatterns["strong"] = \
- inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong')
- self.inlinePatterns["emphasis"] = \
- inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em')
- self.inlinePatterns["emphasis2"] = \
- inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_2_RE, 'em')
- # The order of the handlers matters!!!
-
-
- # Tree processors - run once we have a basic parse.
- self.treeprocessors = odict.OrderedDict()
- self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
- self.treeprocessors["prettify"] = \
- treeprocessors.PrettifyTreeprocessor(self)
-
- # Postprocessors - finishing touches.
- self.postprocessors = odict.OrderedDict()
- self.postprocessors["raw_html"] = \
- postprocessors.RawHtmlPostprocessor(self)
- self.postprocessors["amp_substitute"] = \
- postprocessors.AndSubstitutePostprocessor()
- # footnote postprocessor will be inserted with ">amp_substitute"
-
- # Map format keys to serializers
- self.output_formats = {
- 'html' : html4.to_html_string,
- 'html4' : html4.to_html_string,
- 'xhtml' : etree.tostring,
- 'xhtml1': etree.tostring,
- }
-
- self.references = {}
- self.htmlStash = preprocessors.HtmlStash()
- self.registerExtensions(extensions = extensions,
- configs = extension_configs)
- self.set_output_format(output_format)
- self.reset()
-
- def registerExtensions(self, extensions, configs):
- """
- Register extensions with this instance of Markdown.
-
- Keyword aurguments:
-
- * extensions: A list of extensions, which can either
- be strings or objects. See the docstring on Markdown.
- * configs: A dictionary mapping module names to config options.
-
- """
- for ext in extensions:
- if isinstance(ext, basestring):
- ext = load_extension(ext, configs.get(ext, []))
- if isinstance(ext, Extension):
- try:
- ext.extendMarkdown(self, globals())
- except NotImplementedError, e:
- message(ERROR, e)
- else:
- message(ERROR, 'Extension "%s.%s" must be of type: "markdown.Extension".' \
- % (ext.__class__.__module__, ext.__class__.__name__))
-
- def registerExtension(self, extension):
- """ This gets called by the extension """
- self.registeredExtensions.append(extension)
-
- def reset(self):
- """
- Resets all state variables so that we can start with a new text.
- """
- self.htmlStash.reset()
- self.references.clear()
-
- for extension in self.registeredExtensions:
- extension.reset()
-
- def set_output_format(self, format):
- """ Set the output format for the class instance. """
- try:
- self.serializer = self.output_formats[format.lower()]
- except KeyError:
- message(CRITICAL, 'Invalid Output Format: "%s". Use one of %s.' \
- % (format, self.output_formats.keys()))
-
- def convert(self, source):
- """
- Convert markdown to serialized XHTML or HTML.
-
- Keyword arguments:
-
- * source: Source text as a Unicode string.
-
- """
-
- # Fixup the source text
- if not source.strip():
- return u"" # a blank unicode string
- try:
- source = unicode(source)
- except UnicodeDecodeError:
- message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
- return u""
-
- source = source.replace(STX, "").replace(ETX, "")
- source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
- source = re.sub(r'\n\s+\n', '\n\n', source)
- source = source.expandtabs(TAB_LENGTH)
-
- # Split into lines and run the line preprocessors.
- self.lines = source.split("\n")
- for prep in self.preprocessors.values():
- self.lines = prep.run(self.lines)
-
- # Parse the high-level elements.
- root = self.parser.parseDocument(self.lines).getroot()
-
- # Run the tree-processors
- for treeprocessor in self.treeprocessors.values():
- newRoot = treeprocessor.run(root)
- if newRoot:
- root = newRoot
-
- # Serialize _properly_. Strip top-level tags.
- output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8"))
- if self.stripTopLevelTags:
- try:
- start = output.index('<%s>'%DOC_TAG)+len(DOC_TAG)+2
- end = output.rindex('</%s>'%DOC_TAG)
- output = output[start:end].strip()
- except ValueError:
- if output.strip().endswith('<%s />'%DOC_TAG):
- # We have an empty document
- output = ''
- else:
- # We have a serious problem
- message(CRITICAL, 'Failed to strip top level tags.')
-
- # Run the text post-processors
- for pp in self.postprocessors.values():
- output = pp.run(output)
-
- return output.strip()
-
- def convertFile(self, input=None, output=None, encoding=None):
- """Converts a markdown file and returns the HTML as a unicode string.
-
- Decodes the file using the provided encoding (defaults to utf-8),
- passes the file content to markdown, and outputs the html to either
- the provided stream or the file with provided name, using the same
- encoding as the source file.
-
- **Note:** This is the only place that decoding and encoding of unicode
- takes place in Python-Markdown. (All other code is unicode-in /
- unicode-out.)
-
- Keyword arguments:
-
- * input: Name of source text file.
- * output: Name of output file. Writes to stdout if `None`.
- * encoding: Encoding of input and output files. Defaults to utf-8.
-
- """
-
- encoding = encoding or "utf-8"
-
- # Read the source
- input_file = codecs.open(input, mode="r", encoding=encoding)
- text = input_file.read()
- input_file.close()
- text = text.lstrip(u'\ufeff') # remove the byte-order mark
-
- # Convert
- html = self.convert(text)
-
- # Write to file or stdout
- if isinstance(output, (str, unicode)):
- output_file = codecs.open(output, "w", encoding=encoding)
- output_file.write(html)
- output_file.close()
- else:
- output.write(html.encode(encoding))
-
-
-"""
-Extensions
------------------------------------------------------------------------------
-"""
-
-class Extension:
- """ Base class for extensions to subclass. """
- def __init__(self, configs = {}):
- """Create an instance of an Extention.
-
- Keyword arguments:
-
- * configs: A dict of configuration setting used by an Extension.
- """
- self.config = configs
-
- def getConfig(self, key):
- """ Return a setting for the given key or an empty string. """
- if key in self.config:
- return self.config[key][0]
- else:
- return ""
-
- def getConfigInfo(self):
- """ Return all config settings as a list of tuples. """
- return [(key, self.config[key][1]) for key in self.config.keys()]
-
- def setConfig(self, key, value):
- """ Set a config setting for `key` with the given `value`. """
- self.config[key][0] = value
-
- def extendMarkdown(self, md, md_globals):
- """
- Add the various proccesors and patterns to the Markdown Instance.
-
- This method must be overriden by every extension.
-
- Keyword arguments:
-
- * md: The Markdown instance.
-
- * md_globals: Global variables in the markdown module namespace.
-
- """
- raise NotImplementedError, 'Extension "%s.%s" must define an "extendMarkdown"' \
- 'method.' % (self.__class__.__module__, self.__class__.__name__)
-
-
-def load_extension(ext_name, configs = []):
- """Load extension by name, then return the module.
-
- The extension name may contain arguments as part of the string in the
- following format: "extname(key1=value1,key2=value2)"
-
- """
-
- # Parse extensions config params (ignore the order)
- configs = dict(configs)
- pos = ext_name.find("(") # find the first "("
- if pos > 0:
- ext_args = ext_name[pos+1:-1]
- ext_name = ext_name[:pos]
- pairs = [x.split("=") for x in ext_args.split(",")]
- configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
-
- # Setup the module names
- ext_module = 'markdown.extensions'
- module_name_new_style = '.'.join([ext_module, ext_name])
- module_name_old_style = '_'.join(['mdx', ext_name])
-
- # Try loading the extention first from one place, then another
- try: # New style (markdown.extensons.<extension>)
- module = __import__(module_name_new_style, {}, {}, [ext_module])
- except ImportError:
- try: # Old style (mdx.<extension>)
- module = __import__(module_name_old_style)
- except ImportError:
- message(WARN, "Failed loading extension '%s' from '%s' or '%s'"
- % (ext_name, module_name_new_style, module_name_old_style))
- # Return None so we don't try to initiate none-existant extension
- return None
-
- # If the module is loaded successfully, we expect it to define a
- # function called makeExtension()
- try:
- return module.makeExtension(configs.items())
- except AttributeError:
- message(CRITICAL, "Failed to initiate extension '%s'" % ext_name)
-
-
-def load_extensions(ext_names):
- """Loads multiple extensions"""
- extensions = []
- for ext_name in ext_names:
- extension = load_extension(ext_name)
- if extension:
- extensions.append(extension)
- return extensions
-
-
-"""
-EXPORTED FUNCTIONS
-=============================================================================
-
-Those are the two functions we really mean to export: markdown() and
-markdownFromFile().
-"""
-
-def markdown(text,
- extensions = [],
- safe_mode = False,
- output_format = DEFAULT_OUTPUT_FORMAT):
- """Convert a markdown string to HTML and return HTML as a unicode string.
-
- This is a shortcut function for `Markdown` class to cover the most
- basic use case. It initializes an instance of Markdown, loads the
- necessary extensions and runs the parser on the given text.
-
- Keyword arguments:
-
- * text: Markdown formatted text as Unicode or ASCII string.
- * extensions: A list of extensions or extension names (may contain config args).
- * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
- * output_format: Format of output. Supported formats are:
- * "xhtml1": Outputs XHTML 1.x. Default.
- * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
- * "html4": Outputs HTML 4
- * "html": Outputs latest supported version of HTML (currently HTML 4).
- Note that it is suggested that the more specific formats ("xhtml1"
- and "html4") be used as "xhtml" or "html" may change in the future
- if it makes sense at that time.
-
- Returns: An HTML document as a string.
-
- """
- md = Markdown(extensions=load_extensions(extensions),
- safe_mode=safe_mode,
- output_format=output_format)
- return md.convert(text)
-
-
-def markdownFromFile(input = None,
- output = None,
- extensions = [],
- encoding = None,
- safe_mode = False,
- output_format = DEFAULT_OUTPUT_FORMAT):
- """Read markdown code from a file and write it to a file or a stream."""
- md = Markdown(extensions=load_extensions(extensions),
- safe_mode=safe_mode,
- output_format=output_format)
- md.convertFile(input, output, encoding)
-
+from .core import Markdown, markdown, markdownFromFile
+from .__meta__ import __version__, __version_info__ # noqa
+# For backward compatibility as some extensions expect it...
+from .extensions import Extension # noqa
+__all__ = ['Markdown', 'markdown', 'markdownFromFile']