diff options
Diffstat (limited to 'markdown/extensions/smarty.py')
-rw-r--r-- | markdown/extensions/smarty.py | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py new file mode 100644 index 0000000..c4bfd58 --- /dev/null +++ b/markdown/extensions/smarty.py @@ -0,0 +1,257 @@ +''' +Smarty extension for Python-Markdown +==================================== + +Adds conversion of ASCII dashes, quotes and ellipses to their HTML +entity equivalents. + +See <https://Python-Markdown.github.io/extensions/smarty> +for documentation. + +Author: 2013, Dmitry Shachnev <mitya57@gmail.com> + +All changes Copyright 2013-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +SmartyPants license: + + Copyright (c) 2003 John Gruber <https://daringfireball.net/> + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name "SmartyPants" nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + + +smartypants.py license: + + smartypants.py is a derivative work of SmartyPants. + Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/> + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + +''' + + +from . import Extension +from ..inlinepatterns import HtmlInlineProcessor, HTML_RE +from ..treeprocessors import InlineProcessor +from ..util import Registry + + +# Constants for quote education. +punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" +endOfWordClass = r"[\s.,;:!?)]" +closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" + +openingQuotesBase = ( + r'(\s' # a whitespace char + r'| ' # or a non-breaking space entity + r'|--' # or dashes + r'|–|—' # or unicode + r'|&[mn]dash;' # or named dash entities + r'|–|—' # or decimal entities + r')' +) + +substitutions = { + 'mdash': '—', + 'ndash': '–', + 'ellipsis': '…', + 'left-angle-quote': '«', + 'right-angle-quote': '»', + 'left-single-quote': '‘', + 'right-single-quote': '’', + 'left-double-quote': '“', + 'right-double-quote': '”', +} + + +# Special case if the very first character is a quote +# followed by punctuation at a non-word-break. Close the quotes by brute force: +singleQuoteStartRe = r"^'(?=%s\B)" % punctClass +doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass + +# Special case for double sets of quotes, e.g.: +# <p>He said, "'Quoted' words in a larger quote."</p> +doubleQuoteSetsRe = r""""'(?=\w)""" +singleQuoteSetsRe = r"""'"(?=\w)""" + +# Special case for decade abbreviations (the '80s): +decadeAbbrRe = r"(?<!\w)'(?=\d{2}s)" + +# Get most opening double quotes: +openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase + +# Double closing quotes: +closingDoubleQuotesRegex = r'"(?=\s)' +closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass + +# Get most opening single quotes: +openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase + +# Single closing quotes: +closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass +closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass + +# All remaining quotes should be opening ones +remainingSingleQuotesRegex = r"'" +remainingDoubleQuotesRegex = r'"' + +HTML_STRICT_RE = HTML_RE + r'(?!\>)' + + +class SubstituteTextPattern(HtmlInlineProcessor): + def __init__(self, pattern, replace, md): + """ Replaces matches with some text. """ + HtmlInlineProcessor.__init__(self, pattern) + self.replace = replace + self.md = md + + def handleMatch(self, m, data): + result = '' + for part in self.replace: + if isinstance(part, int): + result += m.group(part) + else: + result += self.md.htmlStash.store(part) + return result, m.start(0), m.end(0) + + +class SmartyExtension(Extension): + def __init__(self, **kwargs): + self.config = { + 'smart_quotes': [True, 'Educate quotes'], + 'smart_angled_quotes': [False, 'Educate angled quotes'], + 'smart_dashes': [True, 'Educate dashes'], + 'smart_ellipses': [True, 'Educate ellipses'], + 'substitutions': [{}, 'Overwrite default substitutions'], + } + super().__init__(**kwargs) + self.substitutions = dict(substitutions) + self.substitutions.update(self.getConfig('substitutions', default={})) + + def _addPatterns(self, md, patterns, serie, priority): + for ind, pattern in enumerate(patterns): + pattern += (md,) + pattern = SubstituteTextPattern(*pattern) + name = 'smarty-%s-%d' % (serie, ind) + self.inlinePatterns.register(pattern, name, priority-ind) + + def educateDashes(self, md): + emDashesPattern = SubstituteTextPattern( + r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md + ) + enDashesPattern = SubstituteTextPattern( + r'(?<!-)--(?!-)', (self.substitutions['ndash'],), md + ) + self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50) + self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45) + + def educateEllipses(self, md): + ellipsesPattern = SubstituteTextPattern( + r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md + ) + self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10) + + def educateAngledQuotes(self, md): + leftAngledQuotePattern = SubstituteTextPattern( + r'\<\<', (self.substitutions['left-angle-quote'],), md + ) + rightAngledQuotePattern = SubstituteTextPattern( + r'\>\>', (self.substitutions['right-angle-quote'],), md + ) + self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40) + self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35) + + def educateQuotes(self, md): + lsquo = self.substitutions['left-single-quote'] + rsquo = self.substitutions['right-single-quote'] + ldquo = self.substitutions['left-double-quote'] + rdquo = self.substitutions['right-double-quote'] + patterns = ( + (singleQuoteStartRe, (rsquo,)), + (doubleQuoteStartRe, (rdquo,)), + (doubleQuoteSetsRe, (ldquo + lsquo,)), + (singleQuoteSetsRe, (lsquo + ldquo,)), + (decadeAbbrRe, (rsquo,)), + (openingSingleQuotesRegex, (1, lsquo)), + (closingSingleQuotesRegex, (rsquo,)), + (closingSingleQuotesRegex2, (rsquo, 1)), + (remainingSingleQuotesRegex, (lsquo,)), + (openingDoubleQuotesRegex, (1, ldquo)), + (closingDoubleQuotesRegex, (rdquo,)), + (closingDoubleQuotesRegex2, (rdquo,)), + (remainingDoubleQuotesRegex, (ldquo,)) + ) + self._addPatterns(md, patterns, 'quotes', 30) + + def extendMarkdown(self, md): + configs = self.getConfigs() + self.inlinePatterns = Registry() + if configs['smart_ellipses']: + self.educateEllipses(md) + if configs['smart_quotes']: + self.educateQuotes(md) + if configs['smart_angled_quotes']: + self.educateAngledQuotes(md) + # Override HTML_RE from inlinepatterns.py so that it does not + # process tags with duplicate closing quotes. + md.inlinePatterns.register(HtmlInlineProcessor(HTML_STRICT_RE, md), 'html', 90) + if configs['smart_dashes']: + self.educateDashes(md) + inlineProcessor = InlineProcessor(md) + inlineProcessor.inlinePatterns = self.inlinePatterns + md.treeprocessors.register(inlineProcessor, 'smarty', 2) + md.ESCAPED_CHARS.extend(['"', "'"]) + + +def makeExtension(**kwargs): # pragma: no cover + return SmartyExtension(**kwargs) |