yapf/yapflib/unwrapped_line.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692

# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""UnwrappedLine primitive for formatting.

An unwrapped line is the containing data structure produced by the parser. It
collects all nodes (stored in FormatToken objects) that could appear on a
single line if there were no line length restrictions. It's then used by the
parser to perform the wrapping required to comply with the style guide.
"""

from yapf.yapflib import format_token
from yapf.yapflib import py3compat
from yapf.yapflib import pytree_utils
from yapf.yapflib import split_penalty
from yapf.yapflib import style
from yapf.yapflib import subtypes

from lib2to3.fixer_util import syms as python_symbols


class UnwrappedLine(object):
  """Represents a single unwrapped line in the output.

  Attributes:
    depth: indentation depth of this line. This is just a numeric value used to
      distinguish lines that are more deeply nested than others. It is not the
      actual amount of spaces, which is style-dependent.
  """

  def __init__(self, depth, tokens=None):
    """Constructor.

    Creates a new unwrapped line with the given depth an initial list of tokens.
    Constructs the doubly-linked lists for format tokens using their built-in
    next_token and previous_token attributes.

    Arguments:
      depth: indentation depth of this line
      tokens: initial list of tokens
    """
    self.depth = depth
    self._tokens = tokens or []
    self.disable = False

    if self._tokens:
      # Set up a doubly linked list.
      for index, tok in enumerate(self._tokens[1:]):
        # Note, 'index' is the index to the previous token.
        tok.previous_token = self._tokens[index]
        self._tokens[index].next_token = tok

  def CalculateFormattingInformation(self):
    """Calculate the split penalty and total length for the tokens."""
    # Say that the first token in the line should have a space before it. This
    # means only that if this unwrapped line is joined with a predecessor line,
    # then there will be a space between them.
    self.first.spaces_required_before = 1
    self.first.total_length = len(self.first.value)

    prev_token = self.first
    prev_length = self.first.total_length
    for token in self._tokens[1:]:
      if (token.spaces_required_before == 0 and
          _SpaceRequiredBetween(prev_token, token, self.disable)):
        token.spaces_required_before = 1

      tok_len = len(token.value) if not token.is_pseudo else 0

      spaces_required_before = token.spaces_required_before
      if isinstance(spaces_required_before, list):
        assert token.is_comment, token

        # If here, we are looking at a comment token that appears on a line
        # with other tokens (but because it is a comment, it is always the last
        # token).  Rather than specifying the actual number of spaces here,
        # hard code a value of 0 and then set it later. This logic only works
        # because this comment token is guaranteed to be the last token in the
        # list.
        spaces_required_before = 0

      token.total_length = prev_length + tok_len + spaces_required_before

      # The split penalty has to be computed before {must|can}_break_before,
      # because these may use it for their decision.
      token.split_penalty += _SplitPenalty(prev_token, token)
      token.must_break_before = _MustBreakBefore(prev_token, token)
      token.can_break_before = (
          token.must_break_before or _CanBreakBefore(prev_token, token))

      prev_length = token.total_length
      prev_token = token

  def Split(self):
    """Split the line at semicolons."""
    if not self.has_semicolon or self.disable:
      return [self]

    uwlines = []
    uwline = UnwrappedLine(self.depth)
    for tok in self._tokens:
      if tok.value == ';':
        uwlines.append(uwline)
        uwline = UnwrappedLine(self.depth)
      else:
        uwline.AppendToken(tok)

    if uwline.tokens:
      uwlines.append(uwline)

    for uwline in uwlines:
      uwline.first.previous_token = None
      uwline.last.next_token = None

    return uwlines

  ############################################################################
  # Token Access and Manipulation Methods                                    #
  ############################################################################

  def AppendToken(self, token):
    """Append a new FormatToken to the tokens contained in this line."""
    if self._tokens:
      token.previous_token = self.last
      self.last.next_token = token
    self._tokens.append(token)

  def AppendNode(self, node):
    """Convenience method to append a pytree node directly.

    Wraps the node with a FormatToken.

    Arguments:
      node: the node to append
    """
    self.AppendToken(format_token.FormatToken(node))

  @property
  def first(self):
    """Returns the first non-whitespace token."""
    return self._tokens[0]

  @property
  def last(self):
    """Returns the last non-whitespace token."""
    return self._tokens[-1]

  ############################################################################
  # Token -> String Methods                                                  #
  ############################################################################

  def AsCode(self, indent_per_depth=2):
    """Return a "code" representation of this line.

    The code representation shows how the line would be printed out as code.

    TODO(eliben): for now this is rudimentary for debugging - once we add
    formatting capabilities, this method will have other uses (not all tokens
    have spaces around them, for example).

    Arguments:
      indent_per_depth: how much spaces to indend per depth level.

    Returns:
      A string representing the line as code.
    """
    indent = ' ' * indent_per_depth * self.depth
    tokens_str = ' '.join(tok.value for tok in self._tokens)
    return indent + tokens_str

  def __str__(self):  # pragma: no cover
    return self.AsCode()

  def __repr__(self):  # pragma: no cover
    tokens_repr = ','.join(
        '{0}({1!r})'.format(tok.name, tok.value) for tok in self._tokens)
    return 'UnwrappedLine(depth={0}, tokens=[{1}])'.format(
        self.depth, tokens_repr)

  ############################################################################
  # Properties                                                               #
  ############################################################################

  @property
  def tokens(self):
    """Access the tokens contained within this line.

    The caller must not modify the tokens list returned by this method.

    Returns:
      List of tokens in this line.
    """
    return self._tokens

  @property
  def lineno(self):
    """Return the line number of this unwrapped line.

    Returns:
      The line number of the first token in this unwrapped line.
    """
    return self.first.lineno

  @property
  def start(self):
    """The start of the logical line.

    Returns:
      A tuple of the starting line number and column.
    """
    return (self.first.lineno, self.first.column)

  @property
  def end(self):
    """The end of the logical line.

    Returns:
      A tuple of the ending line number and column.
    """
    return (self.last.lineno, self.last.column + len(self.last.value))

  @property
  def is_comment(self):
    return self.first.is_comment

  @property
  def has_semicolon(self):
    return any(tok.value == ';' for tok in self._tokens)


def _IsIdNumberStringToken(tok):
  return tok.is_keyword or tok.is_name or tok.is_number or tok.is_string


def _IsUnaryOperator(tok):
  return subtypes.UNARY_OPERATOR in tok.subtypes


def _HasPrecedence(tok):
  """Whether a binary operation has precedence within its context."""
  node = tok.node

  # We let ancestor be the statement surrounding the operation that tok is the
  # operator in.
  ancestor = node.parent.parent

  while ancestor is not None:
    # Search through the ancestor nodes in the parse tree for operators with
    # lower precedence.
    predecessor_type = pytree_utils.NodeName(ancestor)
    if predecessor_type in ['arith_expr', 'term']:
      # An ancestor "arith_expr" or "term" means we have found an operator
      # with lower precedence than our tok.
      return True
    if predecessor_type != 'atom':
      # We understand the context to look for precedence within as an
      # arbitrary nesting of "arith_expr", "term", and "atom" nodes. If we
      # leave this context we have not found a lower precedence operator.
      return False
    # Under normal usage we expect a complete parse tree to be available and
    # we will return before we get an AttributeError from the root.
    ancestor = ancestor.parent


def _PriorityIndicatingNoSpace(tok):
  """Whether to remove spaces around an operator due to precedence."""
  if not tok.is_arithmetic_op or not tok.is_simple_expr:
    # Limit space removal to highest priority arithmetic operators
    return False
  return _HasPrecedence(tok)


def _IsSubscriptColonAndValuePair(token1, token2):
  return (token1.is_number or token1.is_name) and token2.is_subscript_colon


def _SpaceRequiredBetween(left, right, is_line_disabled):
  """Return True if a space is required between the left and right token."""
  lval = left.value
  rval = right.value
  if (left.is_pseudo and _IsIdNumberStringToken(right) and
      left.previous_token and _IsIdNumberStringToken(left.previous_token)):
    # Space between keyword... tokens and pseudo parens.
    return True
  if left.is_pseudo or right.is_pseudo:
    # There should be a space after the ':' in a dictionary.
    if left.OpensScope():
      return True
    # The closing pseudo-paren shouldn't affect spacing.
    return False
  if left.is_continuation or right.is_continuation:
    # The continuation node's value has all of the spaces it needs.
    return False
  if right.name in pytree_utils.NONSEMANTIC_TOKENS:
    # No space before a non-semantic token.
    return False
  if _IsIdNumberStringToken(left) and _IsIdNumberStringToken(right):
    # Spaces between keyword, string, number, and identifier tokens.
    return True
  if lval == ',' and rval == ':':
    # We do want a space between a comma and colon.
    return True
  if style.Get('SPACE_INSIDE_BRACKETS'):
    # Supersede the "no space before a colon or comma" check.
    if lval in pytree_utils.OPENING_BRACKETS and rval == ':':
      return True
    if rval in pytree_utils.CLOSING_BRACKETS and lval == ':':
      return True
  if (style.Get('SPACES_AROUND_SUBSCRIPT_COLON') and
      (_IsSubscriptColonAndValuePair(left, right) or
       _IsSubscriptColonAndValuePair(right, left))):
    # Supersede the "never want a space before a colon or comma" check.
    return True
  if rval in ':,':
    # Otherwise, we never want a space before a colon or comma.
    return False
  if lval == ',' and rval in ']})':
    # Add a space between ending ',' and closing bracket if requested.
    return style.Get('SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET')
  if lval == ',':
    # We want a space after a comma.
    return True
  if lval == 'from' and rval == '.':
    # Space before the '.' in an import statement.
    return True
  if lval == '.' and rval == 'import':
    # Space after the '.' in an import statement.
    return True
  if (lval == '=' and rval in {'.', ',,,'} and
      subtypes.DEFAULT_OR_NAMED_ASSIGN not in left.subtypes):
    # Space between equal and '.' as in "X = ...".
    return True
  if lval == ':' and rval in {'.', '...'}:
    # Space between : and ...
    return True
  if ((right.is_keyword or right.is_name) and
      (left.is_keyword or left.is_name)):
    # Don't merge two keywords/identifiers.
    return True
  if (subtypes.SUBSCRIPT_COLON in left.subtypes or
      subtypes.SUBSCRIPT_COLON in right.subtypes):
    # A subscript shouldn't have spaces separating its colons.
    return False
  if (subtypes.TYPED_NAME in left.subtypes or
      subtypes.TYPED_NAME in right.subtypes):
    # A typed argument should have a space after the colon.
    return True
  if left.is_string:
    if (rval == '=' and
        subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in right.subtypes):
      # If there is a type hint, then we don't want to add a space between the
      # equal sign and the hint.
      return False
    if rval not in '[)]}.' and not right.is_binary_op:
      # A string followed by something other than a subscript, closing bracket,
      # dot, or a binary op should have a space after it.
      return True
    if rval in pytree_utils.CLOSING_BRACKETS:
      # A string followed by closing brackets should have a space after it
      # depending on SPACE_INSIDE_BRACKETS.  A string followed by opening
      # brackets, however, should not.
      return style.Get('SPACE_INSIDE_BRACKETS')
    if subtypes.SUBSCRIPT_BRACKET in right.subtypes:
      # It's legal to do this in Python: 'hello'[a]
      return False
  if left.is_binary_op and lval != '**' and _IsUnaryOperator(right):
    # Space between the binary operator and the unary operator.
    return True
  if left.is_keyword and _IsUnaryOperator(right):
    # Handle things like "not -3 < x".
    return True
  if _IsUnaryOperator(left) and _IsUnaryOperator(right):
    # No space between two unary operators.
    return False
  if left.is_binary_op or right.is_binary_op:
    if lval == '**' or rval == '**':
      # Space around the "power" operator.
      return style.Get('SPACES_AROUND_POWER_OPERATOR')
    # Enforce spaces around binary operators except the blocked ones.
    block_list = style.Get('NO_SPACES_AROUND_SELECTED_BINARY_OPERATORS')
    if lval in block_list or rval in block_list:
      return False
    if style.Get('ARITHMETIC_PRECEDENCE_INDICATION'):
      if _PriorityIndicatingNoSpace(left) or _PriorityIndicatingNoSpace(right):
        return False
      else:
        return True
    else:
      return True
  if (_IsUnaryOperator(left) and lval != 'not' and
      (right.is_name or right.is_number or rval == '(')):
    # The previous token was a unary op. No space is desired between it and
    # the current token.
    return False
  if (subtypes.DEFAULT_OR_NAMED_ASSIGN in left.subtypes and
      subtypes.TYPED_NAME not in right.subtypes):
    # A named argument or default parameter shouldn't have spaces around it.
    return style.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN')
  if (subtypes.DEFAULT_OR_NAMED_ASSIGN in right.subtypes and
      subtypes.TYPED_NAME not in left.subtypes):
    # A named argument or default parameter shouldn't have spaces around it.
    return style.Get('SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN')
  if (subtypes.VARARGS_LIST in left.subtypes or
      subtypes.VARARGS_LIST in right.subtypes):
    return False
  if (subtypes.VARARGS_STAR in left.subtypes or
      subtypes.KWARGS_STAR_STAR in left.subtypes):
    # Don't add a space after a vararg's star or a keyword's star-star.
    return False
  if lval == '@' and subtypes.DECORATOR in left.subtypes:
    # Decorators shouldn't be separated from the 'at' sign.
    return False
  if left.is_keyword and rval == '.':
    # Add space between keywords and dots.
    return lval not in {'None', 'print'}
  if lval == '.' and right.is_keyword:
    # Add space between keywords and dots.
    return rval not in {'None', 'print'}
  if lval == '.' or rval == '.':
    # Don't place spaces between dots.
    return False
  if ((lval == '(' and rval == ')') or (lval == '[' and rval == ']') or
      (lval == '{' and rval == '}')):
    # Empty objects shouldn't be separated by spaces.
    return False
  if not is_line_disabled and (left.OpensScope() or right.ClosesScope()):
    if (style.GetOrDefault('SPACES_AROUND_DICT_DELIMITERS', False) and (
        (lval == '{' and _IsDictListTupleDelimiterTok(left, is_opening=True)) or
        (rval == '}' and
         _IsDictListTupleDelimiterTok(right, is_opening=False)))):
      return True
    if (style.GetOrDefault('SPACES_AROUND_LIST_DELIMITERS', False) and (
        (lval == '[' and _IsDictListTupleDelimiterTok(left, is_opening=True)) or
        (rval == ']' and
         _IsDictListTupleDelimiterTok(right, is_opening=False)))):
      return True
    if (style.GetOrDefault('SPACES_AROUND_TUPLE_DELIMITERS', False) and (
        (lval == '(' and _IsDictListTupleDelimiterTok(left, is_opening=True)) or
        (rval == ')' and
         _IsDictListTupleDelimiterTok(right, is_opening=False)))):
      return True
  if (lval in pytree_utils.OPENING_BRACKETS and
      rval in pytree_utils.OPENING_BRACKETS):
    # Nested objects' opening brackets shouldn't be separated, unless enabled
    # by SPACE_INSIDE_BRACKETS.
    return style.Get('SPACE_INSIDE_BRACKETS')
  if (lval in pytree_utils.CLOSING_BRACKETS and
      rval in pytree_utils.CLOSING_BRACKETS):
    # Nested objects' closing brackets shouldn't be separated, unless enabled
    # by SPACE_INSIDE_BRACKETS.
    return style.Get('SPACE_INSIDE_BRACKETS')
  if lval in pytree_utils.CLOSING_BRACKETS and rval in '([':
    # A call, set, dictionary, or subscript that has a call or subscript after
    # it shouldn't have a space between them.
    return False
  if lval in pytree_utils.OPENING_BRACKETS and _IsIdNumberStringToken(right):
    # Don't separate the opening bracket from the first item, unless enabled
    # by SPACE_INSIDE_BRACKETS.
    return style.Get('SPACE_INSIDE_BRACKETS')
  if left.is_name and rval in '([':
    # Don't separate a call or array access from the name.
    return False
  if rval in pytree_utils.CLOSING_BRACKETS:
    # Don't separate the closing bracket from the last item, unless enabled
    # by SPACE_INSIDE_BRACKETS.
    # FIXME(morbo): This might be too permissive.
    return style.Get('SPACE_INSIDE_BRACKETS')
  if lval == 'print' and rval == '(':
    # Special support for the 'print' function.
    return False
  if lval in pytree_utils.OPENING_BRACKETS and _IsUnaryOperator(right):
    # Don't separate a unary operator from the opening bracket, unless enabled
    # by SPACE_INSIDE_BRACKETS.
    return style.Get('SPACE_INSIDE_BRACKETS')
  if (lval in pytree_utils.OPENING_BRACKETS and
      (subtypes.VARARGS_STAR in right.subtypes or
       subtypes.KWARGS_STAR_STAR in right.subtypes)):
    # Don't separate a '*' or '**' from the opening bracket, unless enabled
    # by SPACE_INSIDE_BRACKETS.
    return style.Get('SPACE_INSIDE_BRACKETS')
  if rval == ';':
    # Avoid spaces before a semicolon. (Why is there a semicolon?!)
    return False
  if lval == '(' and rval == 'await':
    # Special support for the 'await' keyword. Don't separate the 'await'
    # keyword from an opening paren, unless enabled by SPACE_INSIDE_BRACKETS.
    return style.Get('SPACE_INSIDE_BRACKETS')
  return True


def _MustBreakBefore(prev_token, cur_token):
  """Return True if a line break is required before the current token."""
  if prev_token.is_comment or (prev_token.previous_token and
                               prev_token.is_pseudo and
                               prev_token.previous_token.is_comment):
    # Must break if the previous token was a comment.
    return True
  if (cur_token.is_string and prev_token.is_string and
      IsSurroundedByBrackets(cur_token)):
    # We want consecutive strings to be on separate lines. This is a
    # reasonable assumption, because otherwise they should have written them
    # all on the same line, or with a '+'.
    return True
  return cur_token.must_break_before


def _CanBreakBefore(prev_token, cur_token):
  """Return True if a line break may occur before the current token."""
  pval = prev_token.value
  cval = cur_token.value
  if py3compat.PY3:
    if pval == 'yield' and cval == 'from':
      # Don't break before a yield argument.
      return False
    if pval in {'async', 'await'} and cval in {'def', 'with', 'for'}:
      # Don't break after sync keywords.
      return False
  if cur_token.split_penalty >= split_penalty.UNBREAKABLE:
    return False
  if pval == '@':
    # Don't break right after the beginning of a decorator.
    return False
  if cval == ':':
    # Don't break before the start of a block of code.
    return False
  if cval == ',':
    # Don't break before a comma.
    return False
  if prev_token.is_name and cval == '(':
    # Don't break in the middle of a function definition or call.
    return False
  if prev_token.is_name and cval == '[':
    # Don't break in the middle of an array dereference.
    return False
  if cur_token.is_comment and prev_token.lineno == cur_token.lineno:
    # Don't break a comment at the end of the line.
    return False
  if subtypes.UNARY_OPERATOR in prev_token.subtypes:
    # Don't break after a unary token.
    return False
  if not style.Get('ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS'):
    if (subtypes.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes or
        subtypes.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes):
      return False
  return True


def IsSurroundedByBrackets(tok):
  """Return True if the token is surrounded by brackets."""
  paren_count = 0
  brace_count = 0
  sq_bracket_count = 0
  previous_token = tok.previous_token
  while previous_token:
    if previous_token.value == ')':
      paren_count -= 1
    elif previous_token.value == '}':
      brace_count -= 1
    elif previous_token.value == ']':
      sq_bracket_count -= 1

    if previous_token.value == '(':
      if paren_count == 0:
        return previous_token
      paren_count += 1
    elif previous_token.value == '{':
      if brace_count == 0:
        return previous_token
      brace_count += 1
    elif previous_token.value == '[':
      if sq_bracket_count == 0:
        return previous_token
      sq_bracket_count += 1

    previous_token = previous_token.previous_token
  return None


def _IsDictListTupleDelimiterTok(tok, is_opening):
  assert tok

  if tok.matching_bracket is None:
    return False

  if is_opening:
    open_tok = tok
    close_tok = tok.matching_bracket
  else:
    open_tok = tok.matching_bracket
    close_tok = tok

  # There must be something in between the tokens
  if open_tok.next_token == close_tok:
    return False

  assert open_tok.next_token.node
  assert open_tok.next_token.node.parent

  return open_tok.next_token.node.parent.type in [
      python_symbols.dictsetmaker,
      python_symbols.listmaker,
      python_symbols.testlist_gexp,
  ]


_LOGICAL_OPERATORS = frozenset({'and', 'or'})
_BITWISE_OPERATORS = frozenset({'&', '|', '^'})
_ARITHMETIC_OPERATORS = frozenset({'+', '-', '*', '/', '%', '//', '@'})


def _SplitPenalty(prev_token, cur_token):
  """Return the penalty for breaking the line before the current token."""
  pval = prev_token.value
  cval = cur_token.value
  if pval == 'not':
    return split_penalty.UNBREAKABLE

  if cur_token.node_split_penalty > 0:
    return cur_token.node_split_penalty

  if style.Get('SPLIT_BEFORE_LOGICAL_OPERATOR'):
    # Prefer to split before 'and' and 'or'.
    if pval in _LOGICAL_OPERATORS:
      return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR')
    if cval in _LOGICAL_OPERATORS:
      return 0
  else:
    # Prefer to split after 'and' and 'or'.
    if pval in _LOGICAL_OPERATORS:
      return 0
    if cval in _LOGICAL_OPERATORS:
      return style.Get('SPLIT_PENALTY_LOGICAL_OPERATOR')

  if style.Get('SPLIT_BEFORE_BITWISE_OPERATOR'):
    # Prefer to split before '&', '|', and '^'.
    if pval in _BITWISE_OPERATORS:
      return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')
    if cval in _BITWISE_OPERATORS:
      return 0
  else:
    # Prefer to split after '&', '|', and '^'.
    if pval in _BITWISE_OPERATORS:
      return 0
    if cval in _BITWISE_OPERATORS:
      return style.Get('SPLIT_PENALTY_BITWISE_OPERATOR')

  if (subtypes.COMP_FOR in cur_token.subtypes or
      subtypes.COMP_IF in cur_token.subtypes):
    # We don't mind breaking before the 'for' or 'if' of a list comprehension.
    return 0
  if subtypes.UNARY_OPERATOR in prev_token.subtypes:
    # Try not to break after a unary operator.
    return style.Get('SPLIT_PENALTY_AFTER_UNARY_OPERATOR')
  if pval == ',':
    # Breaking after a comma is fine, if need be.
    return 0
  if pval == '**' or cval == '**':
    return split_penalty.STRONGLY_CONNECTED
  if (subtypes.VARARGS_STAR in prev_token.subtypes or
      subtypes.KWARGS_STAR_STAR in prev_token.subtypes):
    # Don't split after a varargs * or kwargs **.
    return split_penalty.UNBREAKABLE
  if prev_token.OpensScope() and cval != '(':
    # Slightly prefer
    return style.Get('SPLIT_PENALTY_AFTER_OPENING_BRACKET')
  if cval == ':':
    # Don't split before a colon.
    return split_penalty.UNBREAKABLE
  if cval == '=':
    # Don't split before an assignment.
    return split_penalty.UNBREAKABLE
  if (subtypes.DEFAULT_OR_NAMED_ASSIGN in prev_token.subtypes or
      subtypes.DEFAULT_OR_NAMED_ASSIGN in cur_token.subtypes):
    # Don't break before or after an default or named assignment.
    return split_penalty.UNBREAKABLE
  if cval == '==':
    # We would rather not split before an equality operator.
    return split_penalty.STRONGLY_CONNECTED
  if cur_token.ClosesScope():
    # Give a slight penalty for splitting before the closing scope.
    return 100
  return 0