1 files changed, 289 insertions, 185 deletions
diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py
index e1a9cda..96ed5c2 100644
--- a/markdown/extensions/footnotes.py
+++ b/markdown/extensions/footnotes.py
@@ -1,81 +1,126 @@
 """
-========================= FOOTNOTES =================================
+Footnotes Extension for Python-Markdown
+=======================================
 
-This section adds footnote handling to markdown.  It can be used as
-an example for extending python-markdown with relatively complex
-functionality.  While in this case the extension is included inside
-the module itself, it could just as easily be added from outside the
-module.  Not that all markdown classes above are ignorant about
-footnotes.  All footnote functionality is provided separately and
-then added to the markdown instance at the run time.
+Adds footnote handling to Python-Markdown.
 
-Footnote functionality is attached by calling extendMarkdown()
-method of FootnoteExtension.  The method also registers the
-extension to allow it's state to be reset by a call to reset()
-method.
+See <https://Python-Markdown.github.io/extensions/footnotes>
+for documentation.
 
-Example:
-    Footnotes[^1] have a label[^label] and a definition[^!DEF].
+Copyright The Python Markdown Project
 
-    [^1]: This is a footnote
-    [^label]: A footnote on "label"
-    [^!DEF]: The footnote for definition
+License: [BSD](https://opensource.org/licenses/bsd-license.php)
 
 """
 
-import re, markdown
-from markdown import etree
+from . import Extension
+from ..blockprocessors import BlockProcessor
+from ..inlinepatterns import InlineProcessor
+from ..treeprocessors import Treeprocessor
+from ..postprocessors import Postprocessor
+from .. import util
+from collections import OrderedDict
+import re
+import copy
+import xml.etree.ElementTree as etree
 
-FN_BACKLINK_TEXT = "zz1337820767766393qq"
-NBSP_PLACEHOLDER =  "qq3936677670287331zz"
-DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
-TABBED_RE = re.compile(r'((\t)|(    ))(.*)')
+FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
+NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
+RE_REF_ID = re.compile(r'(fnref)(\d+)')
 
-class FootnoteExtension(markdown.Extension):
+
+class FootnoteExtension(Extension):
     """ Footnote Extension. """
 
-    def __init__ (self, configs):
+    def __init__(self, **kwargs):
         """ Setup configs. """
-        self.config = {'PLACE_MARKER':
-                       ["///Footnotes Go Here///",
-                        "The text string that marks where the footnotes go"],
-                       'UNIQUE_IDS':
-                       [False,
-                        "Avoid name collisions across "
-                        "multiple calls to reset()."]}
 
-        for key, value in configs:
-            self.config[key][0] = value
+        self.config = {
+            'PLACE_MARKER':
+                ["///Footnotes Go Here///",
+                 "The text string that marks where the footnotes go"],
+            'UNIQUE_IDS':
+                [False,
+                 "Avoid name collisions across "
+                 "multiple calls to reset()."],
+            "BACKLINK_TEXT":
+                ["&#8617;",
+                 "The text string that links from the footnote "
+                 "to the reader's place."],
+            "SUPERSCRIPT_TEXT":
+                ["{}",
+                 "The text string that links from the reader's place "
+                 "to the footnote."],
+            "BACKLINK_TITLE":
+                ["Jump back to footnote %d in the text",
+                 "The text string used for the title HTML attribute "
+                 "of the backlink. %d will be replaced by the "
+                 "footnote number."],
+            "SEPARATOR":
+                [":",
+                 "Footnote separator."]
+        }
+        super().__init__(**kwargs)
 
         # In multiple invocations, emit links that don't get tangled.
         self.unique_prefix = 0
+        self.found_refs = {}
+        self.used_refs = set()
 
         self.reset()
 
-    def extendMarkdown(self, md, md_globals):
+    def extendMarkdown(self, md):
         """ Add pieces to Markdown. """
         md.registerExtension(self)
         self.parser = md.parser
-        # Insert a preprocessor before ReferencePreprocessor
-        md.preprocessors.add("footnote", FootnotePreprocessor(self),
-                             "<reference")
+        self.md = md
+        # Insert a blockprocessor before ReferencePreprocessor
+        md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17)
+
         # Insert an inline pattern before ImageReferencePattern
-        FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
-        md.inlinePatterns.add("footnote", FootnotePattern(FOOTNOTE_RE, self),
-                              "<reference")
+        FOOTNOTE_RE = r'\[\^([^\]]*)\]'  # blah blah [^1] blah
+        md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175)
         # Insert a tree-processor that would actually add the footnote div
-        # This must be before the inline treeprocessor so inline patterns
-        # run on the contents of the div.
-        md.treeprocessors.add("footnote", FootnoteTreeprocessor(self),
-                                 "<inline")
-        # Insert a postprocessor after amp_substitute oricessor
-        md.postprocessors.add("footnote", FootnotePostprocessor(self),
-                                  ">amp_substitute")
+        # This must be before all other treeprocessors (i.e., inline and
+        # codehilite) so they can run on the the contents of the div.
+        md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50)
+
+        # Insert a tree-processor that will run after inline is done.
+        # In this tree-processor we want to check our duplicate footnote tracker
+        # And add additional backrefs to the footnote pointing back to the
+        # duplicated references.
+        md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15)
+
+        # Insert a postprocessor after amp_substitute processor
+        md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
 
     def reset(self):
-        """ Clear the footnotes on reset, and prepare for a distinct document. """
-        self.footnotes = markdown.odict.OrderedDict()
+        """ Clear footnotes on reset, and prepare for distinct document. """
+        self.footnotes = OrderedDict()
         self.unique_prefix += 1
+        self.found_refs = {}
+        self.used_refs = set()
+
+    def unique_ref(self, reference, found=False):
+        """ Get a unique reference if there are duplicates. """
+        if not found:
+            return reference
+
+        original_ref = reference
+        while reference in self.used_refs:
+            ref, rest = reference.split(self.get_separator(), 1)
+            m = RE_REF_ID.match(ref)
+            if m:
+                reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest)
+            else:
+                reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest)
+
+        self.used_refs.add(reference)
+        if original_ref in self.found_refs:
+            self.found_refs[original_ref] += 1
+        else:
+            self.found_refs[original_ref] = 1
+        return reference
 
     def findFootnotesPlaceholder(self, root):
         """ Return ElementTree Element that contains Footnote placeholder. """
@@ -83,13 +128,15 @@ class FootnoteExtension(markdown.Extension):
             for child in element:
                 if child.text:
                     if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
-                        return child, True
+                        return child, element, True
                 if child.tail:
                     if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
-                        return (child, element), False
-                finder(child)
+                        return child, element, False
+                child_res = finder(child)
+                if child_res is not None:
+                    return child_res
             return None
-                
+
         res = finder(root)
         return res
 
@@ -97,43 +144,59 @@ class FootnoteExtension(markdown.Extension):
         """ Store a footnote for later retrieval. """
         self.footnotes[id] = text
 
+    def get_separator(self):
+        """ Get the footnote separator. """
+        return self.getConfig("SEPARATOR")
+
     def makeFootnoteId(self, id):
         """ Return footnote link id. """
         if self.getConfig("UNIQUE_IDS"):
-            return 'fn:%d-%s' % (self.unique_prefix, id)
+            return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id)
         else:
-            return 'fn:%s' % id
+            return 'fn{}{}'.format(self.get_separator(), id)
 
-    def makeFootnoteRefId(self, id):
+    def makeFootnoteRefId(self, id, found=False):
         """ Return footnote back-link id. """
         if self.getConfig("UNIQUE_IDS"):
-            return 'fnref:%d-%s' % (self.unique_prefix, id)
+            return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
         else:
-            return 'fnref:%s' % id
+            return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found)
 
     def makeFootnotesDiv(self, root):
         """ Return div of footnotes as et Element. """
 
-        if not self.footnotes.keys():
+        if not list(self.footnotes.keys()):
             return None
 
         div = etree.Element("div")
         div.set('class', 'footnote')
-        hr = etree.SubElement(div, "hr")
+        etree.SubElement(div, "hr")
         ol = etree.SubElement(div, "ol")
+        surrogate_parent = etree.Element("div")
+
+        # Backward compatibility with old '%d' placeholder
+        backlink_title = self.getConfig("BACKLINK_TITLE").replace("%d", "{}")
 
-        for id in self.footnotes.keys():
+        for index, id in enumerate(self.footnotes.keys(), start=1):
             li = etree.SubElement(ol, "li")
             li.set("id", self.makeFootnoteId(id))
-            self.parser.parseChunk(li, self.footnotes[id])
+            # Parse footnote with surrogate parent as li cannot be used.
+            # List block handlers have special logic to deal with li.
+            # When we are done parsing, we will copy everything over to li.
+            self.parser.parseChunk(surrogate_parent, self.footnotes[id])
+            for el in list(surrogate_parent):
+                li.append(el)
+                surrogate_parent.remove(el)
             backlink = etree.Element("a")
             backlink.set("href", "#" + self.makeFootnoteRefId(id))
-            backlink.set("rev", "footnote")
-            backlink.set("title", "Jump back to footnote %d in the text" % \
-                            (self.footnotes.index(id)+1))
+            backlink.set("class", "footnote-backref")
+            backlink.set(
+                "title",
+                backlink_title.format(index)
+            )
             backlink.text = FN_BACKLINK_TEXT
 
-            if li.getchildren():
+            if len(li):
                 node = li[-1]
                 if node.tag == "p":
                     node.text = node.text + NBSP_PLACEHOLDER
@@ -144,164 +207,205 @@ class FootnoteExtension(markdown.Extension):
         return div
 
 
-class FootnotePreprocessor(markdown.preprocessors.Preprocessor):
+class FootnoteBlockProcessor(BlockProcessor):
     """ Find all footnote references and store for later use. """
 
-    def __init__ (self, footnotes):
-        self.footnotes = footnotes
-
-    def run(self, lines):
-        lines = self._handleFootnoteDefinitions(lines)
-        text = "\n".join(lines)
-        return text.split("\n")
+    RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)
 
-    def _handleFootnoteDefinitions(self, lines):
-        """
-        Recursively find all footnote definitions in lines.
-
-        Keywords:
+    def __init__(self, footnotes):
+        super().__init__(footnotes.parser)
+        self.footnotes = footnotes
 
-        * lines: A list of lines of text
-        
-        Return: A list of lines with footnote definitions removed.
-        
-        """
-        i, id, footnote = self._findFootnoteDefinition(lines)
-
-        if id :
-            plain = lines[:i]
-            detabbed, theRest = self.detectTabbed(lines[i+1:])
-            self.footnotes.setFootnote(id,
-                                       footnote + "\n"
-                                       + "\n".join(detabbed))
-            more_plain = self._handleFootnoteDefinitions(theRest)
-            return plain + [""] + more_plain
-        else :
-            return lines
-
-    def _findFootnoteDefinition(self, lines):
-        """
-        Find the parts of a footnote definition.
+    def test(self, parent, block):
+        return True
+
+    def run(self, parent, blocks):
+        """ Find, set, and remove footnote definitions. """
+        block = blocks.pop(0)
+        m = self.RE.search(block)
+        if m:
+            id = m.group(1)
+            fn_blocks = [m.group(2)]
+
+            # Handle rest of block
+            therest = block[m.end():].lstrip('\n')
+            m2 = self.RE.search(therest)
+            if m2:
+                # Another footnote exists in the rest of this block.
+                # Any content before match is continuation of this footnote, which may be lazily indented.
+                before = therest[:m2.start()].rstrip('\n')
+                fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n')
+                # Add back to blocks everything from beginning of match forward for next iteration.
+                blocks.insert(0, therest[m2.start():])
+            else:
+                # All remaining lines of block are continuation of this footnote, which may be lazily indented.
+                fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n')
 
-        Keywords:
+                # Check for child elements in remaining blocks.
+                fn_blocks.extend(self.detectTabbed(blocks))
 
-        * lines: A list of lines of text.
+            footnote = "\n\n".join(fn_blocks)
+            self.footnotes.setFootnote(id, footnote.rstrip())
 
-        Return: A three item tuple containing the index of the first line of a
-        footnote definition, the id of the definition and the body of the 
-        definition.
-        
-        """
-        counter = 0
-        for line in lines:
-            m = DEF_RE.match(line)
-            if m:
-                return counter, m.group(2), m.group(3)
-            counter += 1
-        return counter, None, None
+            if block[:m.start()].strip():
+                # Add any content before match back to blocks as separate block
+                blocks.insert(0, block[:m.start()].rstrip('\n'))
+            return True
+        # No match. Restore block.
+        blocks.insert(0, block)
+        return False
 
-    def detectTabbed(self, lines):
+    def detectTabbed(self, blocks):
         """ Find indented text and remove indent before further proccesing.
 
-        Keyword arguments:
-
-        * lines: an array of strings
-
-        Returns: a list of post processed items and the unused
-        remainder of the original list
-
+        Returns: a list of blocks with indentation removed.
         """
-        items = []
-        item = -1
-        i = 0 # to keep track of where we are
-
-        def detab(line):
-            match = TABBED_RE.match(line)
-            if match:
-               return match.group(4)
-
-        for line in lines:
-            if line.strip(): # Non-blank line
-                line = detab(line)
-                if line:
-                    items.append(line)
-                    i += 1
-                    continue
-                else:
-                    return items, lines[i:]
-
-            else: # Blank line: _maybe_ we are done.
-                i += 1 # advance
-
-                # Find the next non-blank line
-                for j in range(i, len(lines)):
-                    if lines[j].strip():
-                        next_line = lines[j]; break
+        fn_blocks = []
+        while blocks:
+            if blocks[0].startswith(' '*4):
+                block = blocks.pop(0)
+                # Check for new footnotes within this block and split at new footnote.
+                m = self.RE.search(block)
+                if m:
+                    # Another footnote exists in this block.
+                    # Any content before match is continuation of this footnote, which may be lazily indented.
+                    before = block[:m.start()].rstrip('\n')
+                    fn_blocks.append(self.detab(before))
+                    # Add back to blocks everything from beginning of match forward for next iteration.
+                    blocks.insert(0, block[m.start():])
+                    # End of this footnote.
+                    break
                 else:
-                    break # There is no more text; we are done.
+                    # Entire block is part of this footnote.
+                    fn_blocks.append(self.detab(block))
+            else:
+                # End of this footnote.
+                break
+        return fn_blocks
 
-                # Check if the next non-blank line is tabbed
-                if detab(next_line): # Yes, more work to do.
-                    items.append("")
-                    continue
-                else:
-                    break # No, we are done.
-        else:
-            i += 1
+    def detab(self, block):
+        """ Remove one level of indent from a block.
 
-        return items, lines[i:]
+        Preserve lazily indented blocks by only removing indent from indented lines.
+        """
+        lines = block.split('\n')
+        for i, line in enumerate(lines):
+            if line.startswith(' '*4):
+                lines[i] = line[4:]
+        return '\n'.join(lines)
 
 
-class FootnotePattern(markdown.inlinepatterns.Pattern):
+class FootnoteInlineProcessor(InlineProcessor):
     """ InlinePattern for footnote markers in a document's body text. """
 
     def __init__(self, pattern, footnotes):
-        markdown.inlinepatterns.Pattern.__init__(self, pattern)
+        super().__init__(pattern)
         self.footnotes = footnotes
 
-    def handleMatch(self, m):
-        sup = etree.Element("sup")
-        a = etree.SubElement(sup, "a")
-        id = m.group(2)
-        sup.set('id', self.footnotes.makeFootnoteRefId(id))
-        a.set('href', '#' + self.footnotes.makeFootnoteId(id))
-        a.set('rel', 'footnote')
-        a.text = str(self.footnotes.footnotes.index(id) + 1)
-        return sup
+    def handleMatch(self, m, data):
+        id = m.group(1)
+        if id in self.footnotes.footnotes.keys():
+            sup = etree.Element("sup")
+            a = etree.SubElement(sup, "a")
+            sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
+            a.set('href', '#' + self.footnotes.makeFootnoteId(id))
+            a.set('class', 'footnote-ref')
+            a.text = self.footnotes.getConfig("SUPERSCRIPT_TEXT").format(
+                list(self.footnotes.footnotes.keys()).index(id) + 1
+            )
+            return sup, m.start(0), m.end(0)
+        else:
+            return None, None, None
+
+
+class FootnotePostTreeprocessor(Treeprocessor):
+    """ Amend footnote div with duplicates. """
+
+    def __init__(self, footnotes):
+        self.footnotes = footnotes
 
+    def add_duplicates(self, li, duplicates):
+        """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """
+        for link in li.iter('a'):
+            # Find the link that needs to be duplicated.
+            if link.attrib.get('class', '') == 'footnote-backref':
+                ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1)
+                # Duplicate link the number of times we need to
+                # and point the to the appropriate references.
+                links = []
+                for index in range(2, duplicates + 1):
+                    sib_link = copy.deepcopy(link)
+                    sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest)
+                    links.append(sib_link)
+                    self.offset += 1
+                # Add all the new duplicate links.
+                el = list(li)[-1]
+                for link in links:
+                    el.append(link)
+                break
+
+    def get_num_duplicates(self, li):
+        """ Get the number of duplicate refs of the footnote. """
+        fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)
+        link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
+        return self.footnotes.found_refs.get(link_id, 0)
+
+    def handle_duplicates(self, parent):
+        """ Find duplicate footnotes and format and add the duplicates. """
+        for li in list(parent):
+            # Check number of duplicates footnotes and insert
+            # additional links if needed.
+            count = self.get_num_duplicates(li)
+            if count > 1:
+                self.add_duplicates(li, count)
 
-class FootnoteTreeprocessor(markdown.treeprocessors.Treeprocessor):
+    def run(self, root):
+        """ Crawl the footnote div and add missing duplicate footnotes. """
+        self.offset = 0
+        for div in root.iter('div'):
+            if div.attrib.get('class', '') == 'footnote':
+                # Footnotes should be under the first ordered list under
+                # the footnote div.  So once we find it, quit.
+                for ol in div.iter('ol'):
+                    self.handle_duplicates(ol)
+                    break
+
+
+class FootnoteTreeprocessor(Treeprocessor):
     """ Build and append footnote div to end of document. """
 
-    def __init__ (self, footnotes):
+    def __init__(self, footnotes):
         self.footnotes = footnotes
 
     def run(self, root):
         footnotesDiv = self.footnotes.makeFootnotesDiv(root)
-        if footnotesDiv:
+        if footnotesDiv is not None:
             result = self.footnotes.findFootnotesPlaceholder(root)
             if result:
-                node, isText = result
+                child, parent, isText = result
+                ind = list(parent).index(child)
                 if isText:
-                    node.text = None
-                    node.getchildren().insert(0, footnotesDiv)
+                    parent.remove(child)
+                    parent.insert(ind, footnotesDiv)
                 else:
-                    child, element = node
-                    ind = element.getchildren().find(child)
-                    element.getchildren().insert(ind + 1, footnotesDiv)
+                    parent.insert(ind + 1, footnotesDiv)
                     child.tail = None
-                fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
             else:
                 root.append(footnotesDiv)
 
-class FootnotePostprocessor(markdown.postprocessors.Postprocessor):
+
+class FootnotePostprocessor(Postprocessor):
     """ Replace placeholders with html entities. """
+    def __init__(self, footnotes):
+        self.footnotes = footnotes
 
     def run(self, text):
-        text = text.replace(FN_BACKLINK_TEXT, "&#8617;")
+        text = text.replace(
+            FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")
+        )
         return text.replace(NBSP_PLACEHOLDER, "&#160;")
 
-def makeExtension(configs=[]):
-    """ Return an instance of the FootnoteExtension """
-    return FootnoteExtension(configs=configs)
 
+def makeExtension(**kwargs):  # pragma: no cover
+    """ Return an instance of the FootnoteExtension """
+    return FootnoteExtension(**kwargs)