aboutsummaryrefslogtreecommitdiff
path: root/markdown/blockprocessors.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown/blockprocessors.py')
-rw-r--r--markdown/blockprocessors.py423
1 files changed, 293 insertions, 130 deletions
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index 7d3b137..3d0ff86 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -1,23 +1,64 @@
"""
+Python Markdown
+
+A Python implementation of John Gruber's Markdown.
+
+Documentation: https://python-markdown.github.io/
+GitHub: https://github.com/Python-Markdown/markdown/
+PyPI: https://pypi.org/project/Markdown/
+
+Started by Manfred Stienstra (http://www.dwerg.net/).
+Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+Currently maintained by Waylan Limberg (https://github.com/waylan),
+Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+
+Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
+Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+Copyright 2004 Manfred Stienstra (the original version)
+
+License: BSD (see LICENSE.md for details).
+
CORE MARKDOWN BLOCKPARSER
-=============================================================================
+===========================================================================
-This parser handles basic parsing of Markdown blocks. It doesn't concern itself
-with inline elements such as **bold** or *italics*, but rather just catches
-blocks, lists, quotes, etc.
+This parser handles basic parsing of Markdown blocks. It doesn't concern
+itself with inline elements such as **bold** or *italics*, but rather just
+catches blocks, lists, quotes, etc.
-The BlockParser is made up of a bunch of BlockProssors, each handling a
+The BlockParser is made up of a bunch of BlockProcessors, each handling a
different type of block. Extensions may add/replace/remove BlockProcessors
as they need to alter how markdown blocks are parsed.
-
"""
+import logging
import re
-import markdown
+import xml.etree.ElementTree as etree
+from . import util
+from .blockparser import BlockParser
+
+logger = logging.getLogger('MARKDOWN')
+
+
+def build_block_parser(md, **kwargs):
+ """ Build the default block parser used by Markdown. """
+ parser = BlockParser(md)
+ parser.blockprocessors.register(EmptyBlockProcessor(parser), 'empty', 100)
+ parser.blockprocessors.register(ListIndentProcessor(parser), 'indent', 90)
+ parser.blockprocessors.register(CodeBlockProcessor(parser), 'code', 80)
+ parser.blockprocessors.register(HashHeaderProcessor(parser), 'hashheader', 70)
+ parser.blockprocessors.register(SetextHeaderProcessor(parser), 'setextheader', 60)
+ parser.blockprocessors.register(HRProcessor(parser), 'hr', 50)
+ parser.blockprocessors.register(OListProcessor(parser), 'olist', 40)
+ parser.blockprocessors.register(UListProcessor(parser), 'ulist', 30)
+ parser.blockprocessors.register(BlockQuoteProcessor(parser), 'quote', 20)
+ parser.blockprocessors.register(ReferenceProcessor(parser), 'reference', 15)
+ parser.blockprocessors.register(ParagraphProcessor(parser), 'paragraph', 10)
+ return parser
+
class BlockProcessor:
- """ Base class for block processors.
-
+ """ Base class for block processors.
+
Each subclass will provide the methods below to work with the source and
tree. Each processor will need to define it's own ``test`` and ``run``
methods. The ``test`` method should return True or False, to indicate
@@ -26,8 +67,9 @@ class BlockProcessor:
"""
- def __init__(self, parser=None):
+ def __init__(self, parser):
self.parser = parser
+ self.tab_length = parser.md.tab_length
def lastChild(self, parent):
""" Return the last child of an etree element. """
@@ -36,13 +78,15 @@ class BlockProcessor:
else:
return None
- def detab(self, text):
+ def detab(self, text, length=None):
""" Remove a tab from the front of each line of the given text. """
+ if length is None:
+ length = self.tab_length
newtext = []
lines = text.split('\n')
for line in lines:
- if line.startswith(' '*markdown.TAB_LENGTH):
- newtext.append(line[markdown.TAB_LENGTH:])
+ if line.startswith(' ' * length):
+ newtext.append(line[length:])
elif not line.strip():
newtext.append('')
else:
@@ -53,37 +97,37 @@ class BlockProcessor:
""" Remove a tab from front of lines but allowing dedented lines. """
lines = text.split('\n')
for i in range(len(lines)):
- if lines[i].startswith(' '*markdown.TAB_LENGTH*level):
- lines[i] = lines[i][markdown.TAB_LENGTH*level:]
+ if lines[i].startswith(' '*self.tab_length*level):
+ lines[i] = lines[i][self.tab_length*level:]
return '\n'.join(lines)
def test(self, parent, block):
- """ Test for block type. Must be overridden by subclasses.
-
- As the parser loops through processors, it will call the ``test`` method
- on each to determine if the given block of text is of that type. This
- method must return a boolean ``True`` or ``False``. The actual method of
- testing is left to the needs of that particular block type. It could
- be as simple as ``block.startswith(some_string)`` or a complex regular
- expression. As the block type may be different depending on the parent
- of the block (i.e. inside a list), the parent etree element is also
- provided and may be used as part of the test.
+ """ Test for block type. Must be overridden by subclasses.
+
+ As the parser loops through processors, it will call the ``test``
+ method on each to determine if the given block of text is of that
+ type. This method must return a boolean ``True`` or ``False``. The
+ actual method of testing is left to the needs of that particular
+ block type. It could be as simple as ``block.startswith(some_string)``
+ or a complex regular expression. As the block type may be different
+ depending on the parent of the block (i.e. inside a list), the parent
+ etree element is also provided and may be used as part of the test.
Keywords:
-
+
* ``parent``: A etree element which will be the parent of the block.
- * ``block``: A block of text from the source which has been split at
+ * ``block``: A block of text from the source which has been split at
blank lines.
"""
- pass
+ pass # pragma: no cover
def run(self, parent, blocks):
- """ Run processor. Must be overridden by subclasses.
-
+ """ Run processor. Must be overridden by subclasses.
+
When the parser determines the appropriate type of a block, the parser
will call the corresponding processor's ``run`` method. This method
should parse the individual lines of the block and append them to
- the etree.
+ the etree.
Note that both the ``parent`` and ``etree`` keywords are pointers
to instances of the objects which should be edited in place. Each
@@ -99,12 +143,12 @@ class BlockProcessor:
* ``parent``: A etree element which is the parent of the current block.
* ``blocks``: A list of all remaining blocks of the document.
"""
- pass
+ pass # pragma: no cover
class ListIndentProcessor(BlockProcessor):
- """ Process children of list items.
-
+ """ Process children of list items.
+
Example:
* a list item
process this part
@@ -113,18 +157,19 @@ class ListIndentProcessor(BlockProcessor):
"""
- INDENT_RE = re.compile(r'^(([ ]{%s})+)'% markdown.TAB_LENGTH)
ITEM_TYPES = ['li']
LIST_TYPES = ['ul', 'ol']
+ def __init__(self, *args):
+ super().__init__(*args)
+ self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)
+
def test(self, parent, block):
- return block.startswith(' '*markdown.TAB_LENGTH) and \
- not self.parser.state.isstate('detabbed') and \
- (parent.tag in self.ITEM_TYPES or \
- (len(parent) and parent[-1] and \
- (parent[-1].tag in self.LIST_TYPES)
- )
- )
+ return block.startswith(' '*self.tab_length) and \
+ not self.parser.state.isstate('detabbed') and \
+ (parent.tag in self.ITEM_TYPES or
+ (len(parent) and parent[-1] is not None and
+ (parent[-1].tag in self.LIST_TYPES)))
def run(self, parent, blocks):
block = blocks.pop(0)
@@ -133,8 +178,16 @@ class ListIndentProcessor(BlockProcessor):
self.parser.state.set('detabbed')
if parent.tag in self.ITEM_TYPES:
- # The parent is already a li. Just parse the child block.
- self.parser.parseBlocks(parent, [block])
+ # It's possible that this parent has a 'ul' or 'ol' child list
+ # with a member. If that is the case, then that should be the
+ # parent. This is intended to catch the edge case of an indented
+ # list whose first member was parsed previous to this point
+ # see OListProcessor
+ if len(parent) and parent[-1].tag in self.LIST_TYPES:
+ self.parser.parseBlocks(parent[-1], [block])
+ else:
+ # The parent is already a li. Just parse the child block.
+ self.parser.parseBlocks(parent, [block])
elif sibling.tag in self.ITEM_TYPES:
# The sibling is a li. Use it as parent.
self.parser.parseBlocks(sibling, [block])
@@ -143,8 +196,12 @@ class ListIndentProcessor(BlockProcessor):
# Assume the last child li is the parent of this block.
if sibling[-1].text:
# If the parent li has text, that text needs to be moved to a p
- block = '%s\n\n%s' % (sibling[-1].text, block)
+ # The p must be 'inserted' at beginning of list in the event
+ # that other children already exist i.e.; a nested sublist.
+ p = etree.Element('p')
+ p.text = sibling[-1].text
sibling[-1].text = ''
+ sibling[-1].insert(0, p)
self.parser.parseChunk(sibling[-1], block)
else:
self.create_item(sibling, block)
@@ -152,15 +209,15 @@ class ListIndentProcessor(BlockProcessor):
def create_item(self, parent, block):
""" Create a new li and parse the block with it as the parent. """
- li = markdown.etree.SubElement(parent, 'li')
+ li = etree.SubElement(parent, 'li')
self.parser.parseBlocks(li, [block])
-
+
def get_level(self, parent, block):
""" Get level of indent based on list level. """
# Get indent level
m = self.INDENT_RE.match(block)
if m:
- indent_level = len(m.group(1))/markdown.TAB_LENGTH
+ indent_level = len(m.group(1))/self.tab_length
else:
indent_level = 0
if self.parser.state.isstate('list'):
@@ -172,7 +229,8 @@ class ListIndentProcessor(BlockProcessor):
# Step through children of tree to find matching indent level.
while indent_level > level:
child = self.lastChild(parent)
- if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES):
+ if (child is not None and
+ (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)):
if child.tag in self.LIST_TYPES:
level += 1
parent = child
@@ -187,28 +245,30 @@ class CodeBlockProcessor(BlockProcessor):
""" Process code blocks. """
def test(self, parent, block):
- return block.startswith(' '*markdown.TAB_LENGTH)
-
+ return block.startswith(' '*self.tab_length)
+
def run(self, parent, blocks):
sibling = self.lastChild(parent)
block = blocks.pop(0)
theRest = ''
- if sibling and sibling.tag == "pre" and len(sibling) \
- and sibling[0].tag == "code":
+ if (sibling is not None and sibling.tag == "pre" and
+ len(sibling) and sibling[0].tag == "code"):
# The previous block was a code block. As blank lines do not start
# new code blocks, append this block to the previous, adding back
# linebreaks removed from the split into a list.
code = sibling[0]
block, theRest = self.detab(block)
- code.text = markdown.AtomicString('%s\n%s\n' % (code.text, block.rstrip()))
+ code.text = util.AtomicString(
+ '{}\n{}\n'.format(code.text, util.code_escape(block.rstrip()))
+ )
else:
# This is a new codeblock. Create the elements and insert text.
- pre = markdown.etree.SubElement(parent, 'pre')
- code = markdown.etree.SubElement(pre, 'code')
+ pre = etree.SubElement(parent, 'pre')
+ code = etree.SubElement(pre, 'code')
block, theRest = self.detab(block)
- code.text = markdown.AtomicString('%s\n' % block.rstrip())
+ code.text = util.AtomicString('%s\n' % util.code_escape(block.rstrip()))
if theRest:
- # This block contained unindented line(s) after the first indented
+ # This block contained unindented line(s) after the first indented
# line. Insert these lines as the first block of the master blocks
# list for future processing.
blocks.insert(0, theRest)
@@ -219,27 +279,31 @@ class BlockQuoteProcessor(BlockProcessor):
RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
def test(self, parent, block):
- return bool(self.RE.search(block))
+ return bool(self.RE.search(block)) and not util.nearing_recursion_limit()
def run(self, parent, blocks):
block = blocks.pop(0)
m = self.RE.search(block)
if m:
- before = block[:m.start()] # Lines before blockquote
- # Pass lines before blockquote in recursively for parsing forst.
+ before = block[:m.start()] # Lines before blockquote
+ # Pass lines before blockquote in recursively for parsing first.
self.parser.parseBlocks(parent, [before])
- # Remove ``> `` from begining of each line.
- block = '\n'.join([self.clean(line) for line in
- block[m.start():].split('\n')])
+ # Remove ``> `` from beginning of each line.
+ block = '\n'.join(
+ [self.clean(line) for line in block[m.start():].split('\n')]
+ )
sibling = self.lastChild(parent)
- if sibling and sibling.tag == "blockquote":
+ if sibling is not None and sibling.tag == "blockquote":
# Previous block was a blockquote so set that as this blocks parent
quote = sibling
else:
# This is a new blockquote. Create a new parent element.
- quote = markdown.etree.SubElement(parent, 'blockquote')
+ quote = etree.SubElement(parent, 'blockquote')
# Recursively parse block with blockquote as parent.
+ # change parser state so blockquotes embedded in lists use p tags
+ self.parser.state.set('blockquote')
self.parser.parseChunk(quote, block)
+ self.parser.state.reset()
def clean(self, line):
""" Remove ``>`` from beginning of a line. """
@@ -251,16 +315,31 @@ class BlockQuoteProcessor(BlockProcessor):
else:
return line
+
class OListProcessor(BlockProcessor):
""" Process ordered list blocks. """
TAG = 'ol'
- # Detect an item (``1. item``). ``group(1)`` contains contents of item.
- RE = re.compile(r'^[ ]{0,3}\d+\.[ ]+(.*)')
- # Detect items on secondary lines. they can be of either list type.
- CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ]+(.*)')
- # Detect indented (nested) items of either type
- INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ]+.*')
+ # The integer (python string) with which the lists starts (default=1)
+ # Eg: If list is initialized as)
+ # 3. Item
+ # The ol tag will get starts="3" attribute
+ STARTSWITH = '1'
+ # Lazy ol - ignore startswith
+ LAZY_OL = True
+ # List of allowed sibling tags.
+ SIBLING_TAGS = ['ol', 'ul']
+
+ def __init__(self, parser):
+ super().__init__(parser)
+ # Detect an item (``1. item``). ``group(1)`` contains contents of item.
+ self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1))
+ # Detect items on secondary lines. they can be of either list type.
+ self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.)|[*+-])[ ]+(.*)' %
+ (self.tab_length - 1))
+ # Detect indented (nested) items of either type
+ self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' %
+ (self.tab_length, self.tab_length * 2 - 1))
def test(self, parent, block):
return bool(self.RE.match(block))
@@ -269,33 +348,58 @@ class OListProcessor(BlockProcessor):
# Check fr multiple items in one block.
items = self.get_items(blocks.pop(0))
sibling = self.lastChild(parent)
- if sibling and sibling.tag in ['ol', 'ul']:
+
+ if sibling is not None and sibling.tag in self.SIBLING_TAGS:
# Previous block was a list item, so set that as parent
lst = sibling
- # make sure previous item is in a p.
- if len(lst) and lst[-1].text and not len(lst[-1]):
- p = markdown.etree.SubElement(lst[-1], 'p')
+ # make sure previous item is in a p- if the item has text,
+ # then it isn't in a p
+ if lst[-1].text:
+ # since it's possible there are other children for this
+ # sibling, we can't just SubElement the p, we need to
+ # insert it as the first item.
+ p = etree.Element('p')
p.text = lst[-1].text
lst[-1].text = ''
+ lst[-1].insert(0, p)
+ # if the last item has a tail, then the tail needs to be put in a p
+ # likely only when a header is not followed by a blank line
+ lch = self.lastChild(lst[-1])
+ if lch is not None and lch.tail:
+ p = etree.SubElement(lst[-1], 'p')
+ p.text = lch.tail.lstrip()
+ lch.tail = ''
+
# parse first block differently as it gets wrapped in a p.
- li = markdown.etree.SubElement(lst, 'li')
+ li = etree.SubElement(lst, 'li')
self.parser.state.set('looselist')
firstitem = items.pop(0)
self.parser.parseBlocks(li, [firstitem])
self.parser.state.reset()
+ elif parent.tag in ['ol', 'ul']:
+ # this catches the edge case of a multi-item indented list whose
+ # first item is in a blank parent-list item:
+ # * * subitem1
+ # * subitem2
+ # see also ListIndentProcessor
+ lst = parent
else:
# This is a new list so create parent with appropriate tag.
- lst = markdown.etree.SubElement(parent, self.TAG)
+ lst = etree.SubElement(parent, self.TAG)
+ # Check if a custom start integer is set
+ if not self.LAZY_OL and self.STARTSWITH != '1':
+ lst.attrib['start'] = self.STARTSWITH
+
self.parser.state.set('list')
# Loop through items in block, recursively parsing each with the
# appropriate parent.
for item in items:
- if item.startswith(' '*markdown.TAB_LENGTH):
+ if item.startswith(' '*self.tab_length):
# Item is indented. Parse with last item as parent
self.parser.parseBlocks(lst[-1], [item])
else:
# New item. Create li and parse with it as parent
- li = markdown.etree.SubElement(lst, 'li')
+ li = etree.SubElement(lst, 'li')
self.parser.parseBlocks(li, [item])
self.parser.state.reset()
@@ -305,18 +409,24 @@ class OListProcessor(BlockProcessor):
for line in block.split('\n'):
m = self.CHILD_RE.match(line)
if m:
- # This is a new item. Append
+ # This is a new list item
+ # Check first item for the start index
+ if not items and self.TAG == 'ol':
+ # Detect the integer value of first list item
+ INTEGER_RE = re.compile(r'(\d+)')
+ self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
+ # Append to the list
items.append(m.group(3))
elif self.INDENT_RE.match(line):
# This is an indented (possibly nested) item.
- if items[-1].startswith(' '*markdown.TAB_LENGTH):
+ if items[-1].startswith(' '*self.tab_length):
# Previous item was indented. Append to that item.
- items[-1] = '%s\n%s' % (items[-1], line)
+ items[-1] = '{}\n{}'.format(items[-1], line)
else:
items.append(line)
else:
# This is another line of previous item. Append to that item.
- items[-1] = '%s\n%s' % (items[-1], line)
+ items[-1] = '{}\n{}'.format(items[-1], line)
return items
@@ -324,14 +434,18 @@ class UListProcessor(OListProcessor):
""" Process unordered list blocks. """
TAG = 'ul'
- RE = re.compile(r'^[ ]{0,3}[*+-][ ]+(.*)')
+
+ def __init__(self, parser):
+ super().__init__(parser)
+ # Detect an item (``1. item``). ``group(1)`` contains contents of item.
+ self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))
class HashHeaderProcessor(BlockProcessor):
""" Process Hash Headers. """
# Detect a header at start of any line in block
- RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)')
+ RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')
def test(self, parent, block):
return bool(self.RE.search(block))
@@ -340,29 +454,29 @@ class HashHeaderProcessor(BlockProcessor):
block = blocks.pop(0)
m = self.RE.search(block)
if m:
- before = block[:m.start()] # All lines before header
- after = block[m.end():] # All lines after header
+ before = block[:m.start()] # All lines before header
+ after = block[m.end():] # All lines after header
if before:
# As the header was not the first line of the block and the
# lines before the header must be parsed first,
# recursively parse this lines as a block.
self.parser.parseBlocks(parent, [before])
# Create header using named groups from RE
- h = markdown.etree.SubElement(parent, 'h%d' % len(m.group('level')))
+ h = etree.SubElement(parent, 'h%d' % len(m.group('level')))
h.text = m.group('header').strip()
if after:
# Insert remaining lines as first block for future parsing.
blocks.insert(0, after)
- else:
+ else: # pragma: no cover
# This should never happen, but just in case...
- message(CRITICAL, "We've got a problem header!")
+ logger.warn("We've got a problem header: %r" % block)
class SetextHeaderProcessor(BlockProcessor):
""" Process Setext-style Headers. """
# Detect Setext-style header. Must be first 2 lines of block.
- RE = re.compile(r'^.*?\n[=-]{3,}', re.MULTILINE)
+ RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE)
def test(self, parent, block):
return bool(self.RE.match(block))
@@ -374,7 +488,7 @@ class SetextHeaderProcessor(BlockProcessor):
level = 1
else:
level = 2
- h = markdown.etree.SubElement(parent, 'h%d' % level)
+ h = etree.SubElement(parent, 'h%d' % level)
h.text = lines[0].strip()
if len(lines) > 2:
# Block contains additional lines. Add to master blocks for later.
@@ -384,58 +498,91 @@ class SetextHeaderProcessor(BlockProcessor):
class HRProcessor(BlockProcessor):
""" Process Horizontal Rules. """
- RE = r'[ ]{0,3}(?P<ch>[*_-])[ ]?((?P=ch)[ ]?){2,}[ ]*'
+ # Python's re module doesn't officially support atomic grouping. However you can fake it.
+ # See https://stackoverflow.com/a/13577411/866026
+ RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$'
# Detect hr on any line of a block.
- SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE)
- # Match a hr on a single line of text.
- MATCH_RE = re.compile(r'^%s$' % RE)
+ SEARCH_RE = re.compile(RE, re.MULTILINE)
def test(self, parent, block):
- return bool(self.SEARCH_RE.search(block))
+ m = self.SEARCH_RE.search(block)
+ if m:
+ # Save match object on class instance so we can use it later.
+ self.match = m
+ return True
+ return False
def run(self, parent, blocks):
- lines = blocks.pop(0).split('\n')
- prelines = []
+ block = blocks.pop(0)
+ match = self.match
# Check for lines in block before hr.
- for line in lines:
- m = self.MATCH_RE.match(line)
- if m:
- break
- else:
- prelines.append(line)
- if len(prelines):
+ prelines = block[:match.start()].rstrip('\n')
+ if prelines:
# Recursively parse lines before hr so they get parsed first.
- self.parser.parseBlocks(parent, ['\n'.join(prelines)])
+ self.parser.parseBlocks(parent, [prelines])
# create hr
- hr = markdown.etree.SubElement(parent, 'hr')
+ etree.SubElement(parent, 'hr')
# check for lines in block after hr.
- lines = lines[len(prelines)+1:]
- if len(lines):
+ postlines = block[match.end():].lstrip('\n')
+ if postlines:
# Add lines after hr to master blocks for later parsing.
- blocks.insert(0, '\n'.join(lines))
+ blocks.insert(0, postlines)
class EmptyBlockProcessor(BlockProcessor):
- """ Process blocks and start with an empty line. """
+ """ Process blocks that are empty or start with an empty line. """
- # Detect a block that only contains whitespace
- # or only whitespace on the first line.
- RE = re.compile(r'^\s*\n')
+ def test(self, parent, block):
+ return not block or block.startswith('\n')
+
+ def run(self, parent, blocks):
+ block = blocks.pop(0)
+ filler = '\n\n'
+ if block:
+ # Starts with empty line
+ # Only replace a single line.
+ filler = '\n'
+ # Save the rest for later.
+ theRest = block[1:]
+ if theRest:
+ # Add remaining lines to master blocks for later.
+ blocks.insert(0, theRest)
+ sibling = self.lastChild(parent)
+ if (sibling is not None and sibling.tag == 'pre' and
+ len(sibling) and sibling[0].tag == 'code'):
+ # Last block is a codeblock. Append to preserve whitespace.
+ sibling[0].text = util.AtomicString(
+ '{}{}'.format(sibling[0].text, filler)
+ )
+
+
+class ReferenceProcessor(BlockProcessor):
+ """ Process link references. """
+ RE = re.compile(
+ r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE
+ )
def test(self, parent, block):
- return bool(self.RE.match(block))
+ return True
def run(self, parent, blocks):
block = blocks.pop(0)
- m = self.RE.match(block)
+ m = self.RE.search(block)
if m:
- # Add remaining line to master blocks for later.
- blocks.insert(0, block[m.end():])
- sibling = self.lastChild(parent)
- if sibling and sibling.tag == 'pre' and sibling[0] and \
- sibling[0].tag == 'code':
- # Last block is a codeblock. Append to preserve whitespace.
- sibling[0].text = markdown.AtomicString('%s/n/n/n' % sibling[0].text )
+ id = m.group(1).strip().lower()
+ link = m.group(2).lstrip('<').rstrip('>')
+ title = m.group(5) or m.group(6)
+ self.parser.md.references[id] = (link, title)
+ if block[m.end():].strip():
+ # Add any content after match back to blocks as separate block
+ blocks.insert(0, block[m.end():].lstrip('\n'))
+ if block[:m.start()].strip():
+ # Add any content before match back to blocks as separate block
+ blocks.insert(0, block[:m.start()].rstrip('\n'))
+ return True
+ # No match. Restore block.
+ blocks.insert(0, block)
+ return False
class ParagraphProcessor(BlockProcessor):
@@ -449,12 +596,28 @@ class ParagraphProcessor(BlockProcessor):
if block.strip():
# Not a blank block. Add to parent, otherwise throw it away.
if self.parser.state.isstate('list'):
- # The parent is a tight-list. Append to parent.text
- if parent.text:
- parent.text = '%s\n%s' % (parent.text, block)
+ # The parent is a tight-list.
+ #
+ # Check for any children. This will likely only happen in a
+ # tight-list when a header isn't followed by a blank line.
+ # For example:
+ #
+ # * # Header
+ # Line 2 of list item - not part of header.
+ sibling = self.lastChild(parent)
+ if sibling is not None:
+ # Insetrt after sibling.
+ if sibling.tail:
+ sibling.tail = '{}\n{}'.format(sibling.tail, block)
+ else:
+ sibling.tail = '\n%s' % block
else:
- parent.text = block.lstrip()
+ # Append to parent.text
+ if parent.text:
+ parent.text = '{}\n{}'.format(parent.text, block)
+ else:
+ parent.text = block.lstrip()
else:
# Create a regular paragraph
- p = markdown.etree.SubElement(parent, 'p')
+ p = etree.SubElement(parent, 'p')
p.text = block.lstrip()