aboutsummaryrefslogtreecommitdiff
path: root/setuptools/_vendor/jaraco/text/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'setuptools/_vendor/jaraco/text/__init__.py')
-rw-r--r--setuptools/_vendor/jaraco/text/__init__.py599
1 files changed, 599 insertions, 0 deletions
diff --git a/setuptools/_vendor/jaraco/text/__init__.py b/setuptools/_vendor/jaraco/text/__init__.py
new file mode 100644
index 0000000..a0306d5
--- /dev/null
+++ b/setuptools/_vendor/jaraco/text/__init__.py
@@ -0,0 +1,599 @@
+import re
+import itertools
+import textwrap
+import functools
+
+try:
+ from importlib.resources import files # type: ignore
+except ImportError: # pragma: nocover
+ from setuptools.extern.importlib_resources import files # type: ignore
+
+from setuptools.extern.jaraco.functools import compose, method_cache
+from setuptools.extern.jaraco.context import ExceptionTrap
+
+
+def substitution(old, new):
+ """
+ Return a function that will perform a substitution on a string
+ """
+ return lambda s: s.replace(old, new)
+
+
+def multi_substitution(*substitutions):
+ """
+ Take a sequence of pairs specifying substitutions, and create
+ a function that performs those substitutions.
+
+ >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo')
+ 'baz'
+ """
+ substitutions = itertools.starmap(substitution, substitutions)
+ # compose function applies last function first, so reverse the
+ # substitutions to get the expected order.
+ substitutions = reversed(tuple(substitutions))
+ return compose(*substitutions)
+
+
+class FoldedCase(str):
+ """
+ A case insensitive string class; behaves just like str
+ except compares equal when the only variation is case.
+
+ >>> s = FoldedCase('hello world')
+
+ >>> s == 'Hello World'
+ True
+
+ >>> 'Hello World' == s
+ True
+
+ >>> s != 'Hello World'
+ False
+
+ >>> s.index('O')
+ 4
+
+ >>> s.split('O')
+ ['hell', ' w', 'rld']
+
+ >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
+ ['alpha', 'Beta', 'GAMMA']
+
+ Sequence membership is straightforward.
+
+ >>> "Hello World" in [s]
+ True
+ >>> s in ["Hello World"]
+ True
+
+ You may test for set inclusion, but candidate and elements
+ must both be folded.
+
+ >>> FoldedCase("Hello World") in {s}
+ True
+ >>> s in {FoldedCase("Hello World")}
+ True
+
+ String inclusion works as long as the FoldedCase object
+ is on the right.
+
+ >>> "hello" in FoldedCase("Hello World")
+ True
+
+ But not if the FoldedCase object is on the left:
+
+ >>> FoldedCase('hello') in 'Hello World'
+ False
+
+ In that case, use ``in_``:
+
+ >>> FoldedCase('hello').in_('Hello World')
+ True
+
+ >>> FoldedCase('hello') > FoldedCase('Hello')
+ False
+ """
+
+ def __lt__(self, other):
+ return self.lower() < other.lower()
+
+ def __gt__(self, other):
+ return self.lower() > other.lower()
+
+ def __eq__(self, other):
+ return self.lower() == other.lower()
+
+ def __ne__(self, other):
+ return self.lower() != other.lower()
+
+ def __hash__(self):
+ return hash(self.lower())
+
+ def __contains__(self, other):
+ return super().lower().__contains__(other.lower())
+
+ def in_(self, other):
+ "Does self appear in other?"
+ return self in FoldedCase(other)
+
+ # cache lower since it's likely to be called frequently.
+ @method_cache
+ def lower(self):
+ return super().lower()
+
+ def index(self, sub):
+ return self.lower().index(sub.lower())
+
+ def split(self, splitter=' ', maxsplit=0):
+ pattern = re.compile(re.escape(splitter), re.I)
+ return pattern.split(self, maxsplit)
+
+
+# Python 3.8 compatibility
+_unicode_trap = ExceptionTrap(UnicodeDecodeError)
+
+
+@_unicode_trap.passes
+def is_decodable(value):
+ r"""
+ Return True if the supplied value is decodable (using the default
+ encoding).
+
+ >>> is_decodable(b'\xff')
+ False
+ >>> is_decodable(b'\x32')
+ True
+ """
+ value.decode()
+
+
+def is_binary(value):
+ r"""
+ Return True if the value appears to be binary (that is, it's a byte
+ string and isn't decodable).
+
+ >>> is_binary(b'\xff')
+ True
+ >>> is_binary('\xff')
+ False
+ """
+ return isinstance(value, bytes) and not is_decodable(value)
+
+
+def trim(s):
+ r"""
+ Trim something like a docstring to remove the whitespace that
+ is common due to indentation and formatting.
+
+ >>> trim("\n\tfoo = bar\n\t\tbar = baz\n")
+ 'foo = bar\n\tbar = baz'
+ """
+ return textwrap.dedent(s).strip()
+
+
+def wrap(s):
+ """
+ Wrap lines of text, retaining existing newlines as
+ paragraph markers.
+
+ >>> print(wrap(lorem_ipsum))
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
+ eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
+ minim veniam, quis nostrud exercitation ullamco laboris nisi ut
+ aliquip ex ea commodo consequat. Duis aute irure dolor in
+ reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
+ pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+ culpa qui officia deserunt mollit anim id est laborum.
+ <BLANKLINE>
+ Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam
+ varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus
+ magna felis sollicitudin mauris. Integer in mauris eu nibh euismod
+ gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis
+ risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue,
+ eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas
+ fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla
+ a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis,
+ neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing
+ sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque
+ nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus
+ quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis,
+ molestie eu, feugiat in, orci. In hac habitasse platea dictumst.
+ """
+ paragraphs = s.splitlines()
+ wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs)
+ return '\n\n'.join(wrapped)
+
+
+def unwrap(s):
+ r"""
+ Given a multi-line string, return an unwrapped version.
+
+ >>> wrapped = wrap(lorem_ipsum)
+ >>> wrapped.count('\n')
+ 20
+ >>> unwrapped = unwrap(wrapped)
+ >>> unwrapped.count('\n')
+ 1
+ >>> print(unwrapped)
+ Lorem ipsum dolor sit amet, consectetur adipiscing ...
+ Curabitur pretium tincidunt lacus. Nulla gravida orci ...
+
+ """
+ paragraphs = re.split(r'\n\n+', s)
+ cleaned = (para.replace('\n', ' ') for para in paragraphs)
+ return '\n'.join(cleaned)
+
+
+
+
+class Splitter(object):
+ """object that will split a string with the given arguments for each call
+
+ >>> s = Splitter(',')
+ >>> s('hello, world, this is your, master calling')
+ ['hello', ' world', ' this is your', ' master calling']
+ """
+
+ def __init__(self, *args):
+ self.args = args
+
+ def __call__(self, s):
+ return s.split(*self.args)
+
+
+def indent(string, prefix=' ' * 4):
+ """
+ >>> indent('foo')
+ ' foo'
+ """
+ return prefix + string
+
+
+class WordSet(tuple):
+ """
+ Given an identifier, return the words that identifier represents,
+ whether in camel case, underscore-separated, etc.
+
+ >>> WordSet.parse("camelCase")
+ ('camel', 'Case')
+
+ >>> WordSet.parse("under_sep")
+ ('under', 'sep')
+
+ Acronyms should be retained
+
+ >>> WordSet.parse("firstSNL")
+ ('first', 'SNL')
+
+ >>> WordSet.parse("you_and_I")
+ ('you', 'and', 'I')
+
+ >>> WordSet.parse("A simple test")
+ ('A', 'simple', 'test')
+
+ Multiple caps should not interfere with the first cap of another word.
+
+ >>> WordSet.parse("myABCClass")
+ ('my', 'ABC', 'Class')
+
+ The result is a WordSet, so you can get the form you need.
+
+ >>> WordSet.parse("myABCClass").underscore_separated()
+ 'my_ABC_Class'
+
+ >>> WordSet.parse('a-command').camel_case()
+ 'ACommand'
+
+ >>> WordSet.parse('someIdentifier').lowered().space_separated()
+ 'some identifier'
+
+ Slices of the result should return another WordSet.
+
+ >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated()
+ 'out_of_context'
+
+ >>> WordSet.from_class_name(WordSet()).lowered().space_separated()
+ 'word set'
+
+ >>> example = WordSet.parse('figured it out')
+ >>> example.headless_camel_case()
+ 'figuredItOut'
+ >>> example.dash_separated()
+ 'figured-it-out'
+
+ """
+
+ _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))')
+
+ def capitalized(self):
+ return WordSet(word.capitalize() for word in self)
+
+ def lowered(self):
+ return WordSet(word.lower() for word in self)
+
+ def camel_case(self):
+ return ''.join(self.capitalized())
+
+ def headless_camel_case(self):
+ words = iter(self)
+ first = next(words).lower()
+ new_words = itertools.chain((first,), WordSet(words).camel_case())
+ return ''.join(new_words)
+
+ def underscore_separated(self):
+ return '_'.join(self)
+
+ def dash_separated(self):
+ return '-'.join(self)
+
+ def space_separated(self):
+ return ' '.join(self)
+
+ def trim_right(self, item):
+ """
+ Remove the item from the end of the set.
+
+ >>> WordSet.parse('foo bar').trim_right('foo')
+ ('foo', 'bar')
+ >>> WordSet.parse('foo bar').trim_right('bar')
+ ('foo',)
+ >>> WordSet.parse('').trim_right('bar')
+ ()
+ """
+ return self[:-1] if self and self[-1] == item else self
+
+ def trim_left(self, item):
+ """
+ Remove the item from the beginning of the set.
+
+ >>> WordSet.parse('foo bar').trim_left('foo')
+ ('bar',)
+ >>> WordSet.parse('foo bar').trim_left('bar')
+ ('foo', 'bar')
+ >>> WordSet.parse('').trim_left('bar')
+ ()
+ """
+ return self[1:] if self and self[0] == item else self
+
+ def trim(self, item):
+ """
+ >>> WordSet.parse('foo bar').trim('foo')
+ ('bar',)
+ """
+ return self.trim_left(item).trim_right(item)
+
+ def __getitem__(self, item):
+ result = super(WordSet, self).__getitem__(item)
+ if isinstance(item, slice):
+ result = WordSet(result)
+ return result
+
+ @classmethod
+ def parse(cls, identifier):
+ matches = cls._pattern.finditer(identifier)
+ return WordSet(match.group(0) for match in matches)
+
+ @classmethod
+ def from_class_name(cls, subject):
+ return cls.parse(subject.__class__.__name__)
+
+
+# for backward compatibility
+words = WordSet.parse
+
+
+def simple_html_strip(s):
+ r"""
+ Remove HTML from the string `s`.
+
+ >>> str(simple_html_strip(''))
+ ''
+
+ >>> print(simple_html_strip('A <bold>stormy</bold> day in paradise'))
+ A stormy day in paradise
+
+ >>> print(simple_html_strip('Somebody <!-- do not --> tell the truth.'))
+ Somebody tell the truth.
+
+ >>> print(simple_html_strip('What about<br/>\nmultiple lines?'))
+ What about
+ multiple lines?
+ """
+ html_stripper = re.compile('(<!--.*?-->)|(<[^>]*>)|([^<]+)', re.DOTALL)
+ texts = (match.group(3) or '' for match in html_stripper.finditer(s))
+ return ''.join(texts)
+
+
+class SeparatedValues(str):
+ """
+ A string separated by a separator. Overrides __iter__ for getting
+ the values.
+
+ >>> list(SeparatedValues('a,b,c'))
+ ['a', 'b', 'c']
+
+ Whitespace is stripped and empty values are discarded.
+
+ >>> list(SeparatedValues(' a, b , c, '))
+ ['a', 'b', 'c']
+ """
+
+ separator = ','
+
+ def __iter__(self):
+ parts = self.split(self.separator)
+ return filter(None, (part.strip() for part in parts))
+
+
+class Stripper:
+ r"""
+ Given a series of lines, find the common prefix and strip it from them.
+
+ >>> lines = [
+ ... 'abcdefg\n',
+ ... 'abc\n',
+ ... 'abcde\n',
+ ... ]
+ >>> res = Stripper.strip_prefix(lines)
+ >>> res.prefix
+ 'abc'
+ >>> list(res.lines)
+ ['defg\n', '\n', 'de\n']
+
+ If no prefix is common, nothing should be stripped.
+
+ >>> lines = [
+ ... 'abcd\n',
+ ... '1234\n',
+ ... ]
+ >>> res = Stripper.strip_prefix(lines)
+ >>> res.prefix = ''
+ >>> list(res.lines)
+ ['abcd\n', '1234\n']
+ """
+
+ def __init__(self, prefix, lines):
+ self.prefix = prefix
+ self.lines = map(self, lines)
+
+ @classmethod
+ def strip_prefix(cls, lines):
+ prefix_lines, lines = itertools.tee(lines)
+ prefix = functools.reduce(cls.common_prefix, prefix_lines)
+ return cls(prefix, lines)
+
+ def __call__(self, line):
+ if not self.prefix:
+ return line
+ null, prefix, rest = line.partition(self.prefix)
+ return rest
+
+ @staticmethod
+ def common_prefix(s1, s2):
+ """
+ Return the common prefix of two lines.
+ """
+ index = min(len(s1), len(s2))
+ while s1[:index] != s2[:index]:
+ index -= 1
+ return s1[:index]
+
+
+def remove_prefix(text, prefix):
+ """
+ Remove the prefix from the text if it exists.
+
+ >>> remove_prefix('underwhelming performance', 'underwhelming ')
+ 'performance'
+
+ >>> remove_prefix('something special', 'sample')
+ 'something special'
+ """
+ null, prefix, rest = text.rpartition(prefix)
+ return rest
+
+
+def remove_suffix(text, suffix):
+ """
+ Remove the suffix from the text if it exists.
+
+ >>> remove_suffix('name.git', '.git')
+ 'name'
+
+ >>> remove_suffix('something special', 'sample')
+ 'something special'
+ """
+ rest, suffix, null = text.partition(suffix)
+ return rest
+
+
+def normalize_newlines(text):
+ r"""
+ Replace alternate newlines with the canonical newline.
+
+ >>> normalize_newlines('Lorem Ipsum\u2029')
+ 'Lorem Ipsum\n'
+ >>> normalize_newlines('Lorem Ipsum\r\n')
+ 'Lorem Ipsum\n'
+ >>> normalize_newlines('Lorem Ipsum\x85')
+ 'Lorem Ipsum\n'
+ """
+ newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029']
+ pattern = '|'.join(newlines)
+ return re.sub(pattern, '\n', text)
+
+
+def _nonblank(str):
+ return str and not str.startswith('#')
+
+
+@functools.singledispatch
+def yield_lines(iterable):
+ r"""
+ Yield valid lines of a string or iterable.
+
+ >>> list(yield_lines(''))
+ []
+ >>> list(yield_lines(['foo', 'bar']))
+ ['foo', 'bar']
+ >>> list(yield_lines('foo\nbar'))
+ ['foo', 'bar']
+ >>> list(yield_lines('\nfoo\n#bar\nbaz #comment'))
+ ['foo', 'baz #comment']
+ >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n']))
+ ['foo', 'bar', 'baz', 'bing']
+ """
+ return itertools.chain.from_iterable(map(yield_lines, iterable))
+
+
+@yield_lines.register(str)
+def _(text):
+ return filter(_nonblank, map(str.strip, text.splitlines()))
+
+
+def drop_comment(line):
+ """
+ Drop comments.
+
+ >>> drop_comment('foo # bar')
+ 'foo'
+
+ A hash without a space may be in a URL.
+
+ >>> drop_comment('http://example.com/foo#bar')
+ 'http://example.com/foo#bar'
+ """
+ return line.partition(' #')[0]
+
+
+def join_continuation(lines):
+ r"""
+ Join lines continued by a trailing backslash.
+
+ >>> list(join_continuation(['foo \\', 'bar', 'baz']))
+ ['foobar', 'baz']
+ >>> list(join_continuation(['foo \\', 'bar', 'baz']))
+ ['foobar', 'baz']
+ >>> list(join_continuation(['foo \\', 'bar \\', 'baz']))
+ ['foobarbaz']
+
+ Not sure why, but...
+ The character preceeding the backslash is also elided.
+
+ >>> list(join_continuation(['goo\\', 'dly']))
+ ['godly']
+
+ A terrible idea, but...
+ If no line is available to continue, suppress the lines.
+
+ >>> list(join_continuation(['foo', 'bar\\', 'baz\\']))
+ ['foo']
+ """
+ lines = iter(lines)
+ for item in lines:
+ while item.endswith('\\'):
+ try:
+ item = item[:-2].strip() + next(lines)
+ except StopIteration:
+ return
+ yield item