aboutsummaryrefslogtreecommitdiff
path: root/markdown/extensions/footnotes.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown/extensions/footnotes.py')
-rw-r--r--markdown/extensions/footnotes.py474
1 files changed, 289 insertions, 185 deletions
diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py
index e1a9cda..96ed5c2 100644
--- a/markdown/extensions/footnotes.py
+++ b/markdown/extensions/footnotes.py
@@ -1,81 +1,126 @@
"""
-========================= FOOTNOTES =================================
+Footnotes Extension for Python-Markdown
+=======================================
-This section adds footnote handling to markdown. It can be used as
-an example for extending python-markdown with relatively complex
-functionality. While in this case the extension is included inside
-the module itself, it could just as easily be added from outside the
-module. Not that all markdown classes above are ignorant about
-footnotes. All footnote functionality is provided separately and
-then added to the markdown instance at the run time.
+Adds footnote handling to Python-Markdown.
-Footnote functionality is attached by calling extendMarkdown()
-method of FootnoteExtension. The method also registers the
-extension to allow it's state to be reset by a call to reset()
-method.
+See <https://Python-Markdown.github.io/extensions/footnotes>
+for documentation.
-Example:
- Footnotes[^1] have a label[^label] and a definition[^!DEF].
+Copyright The Python Markdown Project
- [^1]: This is a footnote
- [^label]: A footnote on "label"
- [^!DEF]: The footnote for definition
+License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
-import re, markdown
-from markdown import etree
+from . import Extension
+from ..blockprocessors import BlockProcessor
+from ..inlinepatterns import InlineProcessor
+from ..treeprocessors import Treeprocessor
+from ..postprocessors import Postprocessor
+from .. import util
+from collections import OrderedDict
+import re
+import copy
+import xml.etree.ElementTree as etree
-FN_BACKLINK_TEXT = "zz1337820767766393qq"
-NBSP_PLACEHOLDER = "qq3936677670287331zz"
-DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
-TABBED_RE = re.compile(r'((\t)|( ))(.*)')
+FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
+NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
+RE_REF_ID = re.compile(r'(fnref)(\d+)')
-class FootnoteExtension(markdown.Extension):
+
+class FootnoteExtension(Extension):
""" Footnote Extension. """
- def __init__ (self, configs):
+ def __init__(self, **kwargs):
""" Setup configs. """
- self.config = {'PLACE_MARKER':
- ["///Footnotes Go Here///",
- "The text string that marks where the footnotes go"],
- 'UNIQUE_IDS':
- [False,
- "Avoid name collisions across "
- "multiple calls to reset()."]}
- for key, value in configs:
- self.config[key][0] = value
+ self.config = {
+ 'PLACE_MARKER':
+ ["///Footnotes Go Here///",
+ "The text string that marks where the footnotes go"],
+ 'UNIQUE_IDS':
+ [False,
+ "Avoid name collisions across "
+ "multiple calls to reset()."],
+ "BACKLINK_TEXT":
+ ["&#8617;",
+ "The text string that links from the footnote "
+ "to the reader's place."],
+ "SUPERSCRIPT_TEXT":
+ ["{}",
+ "The text string that links from the reader's place "
+ "to the footnote."],
+ "BACKLINK_TITLE":
+ ["Jump back to footnote %d in the text",
+ "The text string used for the title HTML attribute "
+ "of the backlink. %d will be replaced by the "
+ "footnote number."],
+ "SEPARATOR":
+ [":",
+ "Footnote separator."]
+ }
+ super().__init__(**kwargs)
# In multiple invocations, emit links that don't get tangled.
self.unique_prefix = 0
+ self.found_refs = {}
+ self.used_refs = set()
self.reset()
- def extendMarkdown(self, md, md_globals):
+ def extendMarkdown(self, md):
""" Add pieces to Markdown. """
md.registerExtension(self)
self.parser = md.parser
- # Insert a preprocessor before ReferencePreprocessor
- md.preprocessors.add("footnote", FootnotePreprocessor(self),
- "<reference")
+ self.md = md
+ # Insert a blockprocessor before ReferencePreprocessor
+ md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17)
+
# Insert an inline pattern before ImageReferencePattern
- FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
- md.inlinePatterns.add("footnote", FootnotePattern(FOOTNOTE_RE, self),
- "<reference")
+ FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
+ md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175)
# Insert a tree-processor that would actually add the footnote div
- # This must be before the inline treeprocessor so inline patterns
- # run on the contents of the div.
- md.treeprocessors.add("footnote", FootnoteTreeprocessor(self),
- "<inline")
- # Insert a postprocessor after amp_substitute oricessor
- md.postprocessors.add("footnote", FootnotePostprocessor(self),
- ">amp_substitute")
+ # This must be before all other treeprocessors (i.e., inline and
+ # codehilite) so they can run on the the contents of the div.
+ md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50)
+
+ # Insert a tree-processor that will run after inline is done.
+ # In this tree-processor we want to check our duplicate footnote tracker
+ # And add additional backrefs to the footnote pointing back to the
+ # duplicated references.
+ md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15)
+
+ # Insert a postprocessor after amp_substitute processor
+ md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
def reset(self):
- """ Clear the footnotes on reset, and prepare for a distinct document. """
- self.footnotes = markdown.odict.OrderedDict()
+ """ Clear footnotes on reset, and prepare for distinct document. """
+ self.footnotes = OrderedDict()
self.unique_prefix += 1
+ self.found_refs = {}
+ self.used_refs = set()
+
+ def unique_ref(self, reference, found=False):
+ """ Get a unique reference if there are duplicates. """
+ if not found:
+ return reference
+
+ original_ref = reference
+ while reference in self.used_refs:
+ ref, rest = reference.split(self.get_separator(), 1)
+ m = RE_REF_ID.match(ref)
+ if m:
+ reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest)
+ else:
+ reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest)
+
+ self.used_refs.add(reference)
+ if original_ref in self.found_refs:
+ self.found_refs[original_ref] += 1
+ else:
+ self.found_refs[original_ref] = 1
+ return reference
def findFootnotesPlaceholder(self, root):
""" Return ElementTree Element that contains Footnote placeholder. """
@@ -83,13 +128,15 @@ class FootnoteExtension(markdown.Extension):
for child in element:
if child.text:
if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
- return child, True
+ return child, element, True
if child.tail:
if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
- return (child, element), False
- finder(child)
+ return child, element, False
+ child_res = finder(child)
+ if child_res is not None:
+ return child_res
return None
-
+
res = finder(root)
return res
@@ -97,43 +144,59 @@ class FootnoteExtension(markdown.Extension):
""" Store a footnote for later retrieval. """
self.footnotes[id] = text
+ def get_separator(self):
+ """ Get the footnote separator. """
+ return self.getConfig("SEPARATOR")
+
def makeFootnoteId(self, id):
""" Return footnote link id. """
if self.getConfig("UNIQUE_IDS"):
- return 'fn:%d-%s' % (self.unique_prefix, id)
+ return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id)
else:
- return 'fn:%s' % id
+ return 'fn{}{}'.format(self.get_separator(), id)
- def makeFootnoteRefId(self, id):
+ def makeFootnoteRefId(self, id, found=False):
""" Return footnote back-link id. """
if self.getConfig("UNIQUE_IDS"):
- return 'fnref:%d-%s' % (self.unique_prefix, id)
+ return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
else:
- return 'fnref:%s' % id
+ return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found)
def makeFootnotesDiv(self, root):
""" Return div of footnotes as et Element. """
- if not self.footnotes.keys():
+ if not list(self.footnotes.keys()):
return None
div = etree.Element("div")
div.set('class', 'footnote')
- hr = etree.SubElement(div, "hr")
+ etree.SubElement(div, "hr")
ol = etree.SubElement(div, "ol")
+ surrogate_parent = etree.Element("div")
+
+ # Backward compatibility with old '%d' placeholder
+ backlink_title = self.getConfig("BACKLINK_TITLE").replace("%d", "{}")
- for id in self.footnotes.keys():
+ for index, id in enumerate(self.footnotes.keys(), start=1):
li = etree.SubElement(ol, "li")
li.set("id", self.makeFootnoteId(id))
- self.parser.parseChunk(li, self.footnotes[id])
+ # Parse footnote with surrogate parent as li cannot be used.
+ # List block handlers have special logic to deal with li.
+ # When we are done parsing, we will copy everything over to li.
+ self.parser.parseChunk(surrogate_parent, self.footnotes[id])
+ for el in list(surrogate_parent):
+ li.append(el)
+ surrogate_parent.remove(el)
backlink = etree.Element("a")
backlink.set("href", "#" + self.makeFootnoteRefId(id))
- backlink.set("rev", "footnote")
- backlink.set("title", "Jump back to footnote %d in the text" % \
- (self.footnotes.index(id)+1))
+ backlink.set("class", "footnote-backref")
+ backlink.set(
+ "title",
+ backlink_title.format(index)
+ )
backlink.text = FN_BACKLINK_TEXT
- if li.getchildren():
+ if len(li):
node = li[-1]
if node.tag == "p":
node.text = node.text + NBSP_PLACEHOLDER
@@ -144,164 +207,205 @@ class FootnoteExtension(markdown.Extension):
return div
-class FootnotePreprocessor(markdown.preprocessors.Preprocessor):
+class FootnoteBlockProcessor(BlockProcessor):
""" Find all footnote references and store for later use. """
- def __init__ (self, footnotes):
- self.footnotes = footnotes
-
- def run(self, lines):
- lines = self._handleFootnoteDefinitions(lines)
- text = "\n".join(lines)
- return text.split("\n")
+ RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)
- def _handleFootnoteDefinitions(self, lines):
- """
- Recursively find all footnote definitions in lines.
-
- Keywords:
+ def __init__(self, footnotes):
+ super().__init__(footnotes.parser)
+ self.footnotes = footnotes
- * lines: A list of lines of text
-
- Return: A list of lines with footnote definitions removed.
-
- """
- i, id, footnote = self._findFootnoteDefinition(lines)
-
- if id :
- plain = lines[:i]
- detabbed, theRest = self.detectTabbed(lines[i+1:])
- self.footnotes.setFootnote(id,
- footnote + "\n"
- + "\n".join(detabbed))
- more_plain = self._handleFootnoteDefinitions(theRest)
- return plain + [""] + more_plain
- else :
- return lines
-
- def _findFootnoteDefinition(self, lines):
- """
- Find the parts of a footnote definition.
+ def test(self, parent, block):
+ return True
+
+ def run(self, parent, blocks):
+ """ Find, set, and remove footnote definitions. """
+ block = blocks.pop(0)
+ m = self.RE.search(block)
+ if m:
+ id = m.group(1)
+ fn_blocks = [m.group(2)]
+
+ # Handle rest of block
+ therest = block[m.end():].lstrip('\n')
+ m2 = self.RE.search(therest)
+ if m2:
+ # Another footnote exists in the rest of this block.
+ # Any content before match is continuation of this footnote, which may be lazily indented.
+ before = therest[:m2.start()].rstrip('\n')
+ fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n')
+ # Add back to blocks everything from beginning of match forward for next iteration.
+ blocks.insert(0, therest[m2.start():])
+ else:
+ # All remaining lines of block are continuation of this footnote, which may be lazily indented.
+ fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n')
- Keywords:
+ # Check for child elements in remaining blocks.
+ fn_blocks.extend(self.detectTabbed(blocks))
- * lines: A list of lines of text.
+ footnote = "\n\n".join(fn_blocks)
+ self.footnotes.setFootnote(id, footnote.rstrip())
- Return: A three item tuple containing the index of the first line of a
- footnote definition, the id of the definition and the body of the
- definition.
-
- """
- counter = 0
- for line in lines:
- m = DEF_RE.match(line)
- if m:
- return counter, m.group(2), m.group(3)
- counter += 1
- return counter, None, None
+ if block[:m.start()].strip():
+ # Add any content before match back to blocks as separate block
+ blocks.insert(0, block[:m.start()].rstrip('\n'))
+ return True
+ # No match. Restore block.
+ blocks.insert(0, block)
+ return False
- def detectTabbed(self, lines):
+ def detectTabbed(self, blocks):
""" Find indented text and remove indent before further proccesing.
- Keyword arguments:
-
- * lines: an array of strings
-
- Returns: a list of post processed items and the unused
- remainder of the original list
-
+ Returns: a list of blocks with indentation removed.
"""
- items = []
- item = -1
- i = 0 # to keep track of where we are
-
- def detab(line):
- match = TABBED_RE.match(line)
- if match:
- return match.group(4)
-
- for line in lines:
- if line.strip(): # Non-blank line
- line = detab(line)
- if line:
- items.append(line)
- i += 1
- continue
- else:
- return items, lines[i:]
-
- else: # Blank line: _maybe_ we are done.
- i += 1 # advance
-
- # Find the next non-blank line
- for j in range(i, len(lines)):
- if lines[j].strip():
- next_line = lines[j]; break
+ fn_blocks = []
+ while blocks:
+ if blocks[0].startswith(' '*4):
+ block = blocks.pop(0)
+ # Check for new footnotes within this block and split at new footnote.
+ m = self.RE.search(block)
+ if m:
+ # Another footnote exists in this block.
+ # Any content before match is continuation of this footnote, which may be lazily indented.
+ before = block[:m.start()].rstrip('\n')
+ fn_blocks.append(self.detab(before))
+ # Add back to blocks everything from beginning of match forward for next iteration.
+ blocks.insert(0, block[m.start():])
+ # End of this footnote.
+ break
else:
- break # There is no more text; we are done.
+ # Entire block is part of this footnote.
+ fn_blocks.append(self.detab(block))
+ else:
+ # End of this footnote.
+ break
+ return fn_blocks
- # Check if the next non-blank line is tabbed
- if detab(next_line): # Yes, more work to do.
- items.append("")
- continue
- else:
- break # No, we are done.
- else:
- i += 1
+ def detab(self, block):
+ """ Remove one level of indent from a block.
- return items, lines[i:]
+ Preserve lazily indented blocks by only removing indent from indented lines.
+ """
+ lines = block.split('\n')
+ for i, line in enumerate(lines):
+ if line.startswith(' '*4):
+ lines[i] = line[4:]
+ return '\n'.join(lines)
-class FootnotePattern(markdown.inlinepatterns.Pattern):
+class FootnoteInlineProcessor(InlineProcessor):
""" InlinePattern for footnote markers in a document's body text. """
def __init__(self, pattern, footnotes):
- markdown.inlinepatterns.Pattern.__init__(self, pattern)
+ super().__init__(pattern)
self.footnotes = footnotes
- def handleMatch(self, m):
- sup = etree.Element("sup")
- a = etree.SubElement(sup, "a")
- id = m.group(2)
- sup.set('id', self.footnotes.makeFootnoteRefId(id))
- a.set('href', '#' + self.footnotes.makeFootnoteId(id))
- a.set('rel', 'footnote')
- a.text = str(self.footnotes.footnotes.index(id) + 1)
- return sup
+ def handleMatch(self, m, data):
+ id = m.group(1)
+ if id in self.footnotes.footnotes.keys():
+ sup = etree.Element("sup")
+ a = etree.SubElement(sup, "a")
+ sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
+ a.set('href', '#' + self.footnotes.makeFootnoteId(id))
+ a.set('class', 'footnote-ref')
+ a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format(
+ list(self.footnotes.footnotes.keys()).index(id) + 1
+ )
+ return sup, m.start(0), m.end(0)
+ else:
+ return None, None, None
+
+
+class FootnotePostTreeprocessor(Treeprocessor):
+ """ Amend footnote div with duplicates. """
+
+ def __init__(self, footnotes):
+ self.footnotes = footnotes
+ def add_duplicates(self, li, duplicates):
+ """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """
+ for link in li.iter('a'):
+ # Find the link that needs to be duplicated.
+ if link.attrib.get('class', '') == 'footnote-backref':
+ ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1)
+ # Duplicate link the number of times we need to
+ # and point the to the appropriate references.
+ links = []
+ for index in range(2, duplicates + 1):
+ sib_link = copy.deepcopy(link)
+ sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest)
+ links.append(sib_link)
+ self.offset += 1
+ # Add all the new duplicate links.
+ el = list(li)[-1]
+ for link in links:
+ el.append(link)
+ break
+
+ def get_num_duplicates(self, li):
+ """ Get the number of duplicate refs of the footnote. """
+ fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)
+ link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
+ return self.footnotes.found_refs.get(link_id, 0)
+
+ def handle_duplicates(self, parent):
+ """ Find duplicate footnotes and format and add the duplicates. """
+ for li in list(parent):
+ # Check number of duplicates footnotes and insert
+ # additional links if needed.
+ count = self.get_num_duplicates(li)
+ if count > 1:
+ self.add_duplicates(li, count)
-class FootnoteTreeprocessor(markdown.treeprocessors.Treeprocessor):
+ def run(self, root):
+ """ Crawl the footnote div and add missing duplicate footnotes. """
+ self.offset = 0
+ for div in root.iter('div'):
+ if div.attrib.get('class', '') == 'footnote':
+ # Footnotes should be under the first ordered list under
+ # the footnote div. So once we find it, quit.
+ for ol in div.iter('ol'):
+ self.handle_duplicates(ol)
+ break
+
+
+class FootnoteTreeprocessor(Treeprocessor):
""" Build and append footnote div to end of document. """
- def __init__ (self, footnotes):
+ def __init__(self, footnotes):
self.footnotes = footnotes
def run(self, root):
footnotesDiv = self.footnotes.makeFootnotesDiv(root)
- if footnotesDiv:
+ if footnotesDiv is not None:
result = self.footnotes.findFootnotesPlaceholder(root)
if result:
- node, isText = result
+ child, parent, isText = result
+ ind = list(parent).index(child)
if isText:
- node.text = None
- node.getchildren().insert(0, footnotesDiv)
+ parent.remove(child)
+ parent.insert(ind, footnotesDiv)
else:
- child, element = node
- ind = element.getchildren().find(child)
- element.getchildren().insert(ind + 1, footnotesDiv)
+ parent.insert(ind + 1, footnotesDiv)
child.tail = None
- fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
else:
root.append(footnotesDiv)
-class FootnotePostprocessor(markdown.postprocessors.Postprocessor):
+
+class FootnotePostprocessor(Postprocessor):
""" Replace placeholders with html entities. """
+ def __init__(self, footnotes):
+ self.footnotes = footnotes
def run(self, text):
- text = text.replace(FN_BACKLINK_TEXT, "&#8617;")
+ text = text.replace(
+ FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")
+ )
return text.replace(NBSP_PLACEHOLDER, "&#160;")
-def makeExtension(configs=[]):
- """ Return an instance of the FootnoteExtension """
- return FootnoteExtension(configs=configs)
+def makeExtension(**kwargs): # pragma: no cover
+ """ Return an instance of the FootnoteExtension """
+ return FootnoteExtension(**kwargs)