Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 89 additions & 5 deletions lizard_languages/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
from .script_language import ScriptLanguageMixIn


# Triple-quoted string patterns for the common tokenizer, so a whole
# triple-quoted string is matched as a single token.
_PY_TRIPLE_QUOTE = (
r"|(?:\"\"\"(?:\\.|[^\"]|\"(?!\"\")|\"\"(?!\"))*\"\"\")"
r"|(?:\'\'\'(?:\\.|[^\']|\'(?!\'\')|\'\'(?!\'))*\'\'\')"
)


def count_spaces(token):
return len(token.replace('\t', ' ' * 8))

Expand Down Expand Up @@ -59,13 +67,89 @@ def __init__(self, context):
self._keyword_case = False # set by _soft_keyword_lookahead: True when 'case' is a soft keyword
self._keyword_match = False # set by _soft_keyword_lookahead: True when 'match' is a soft keyword

# f-string prefixes (any case). rb/br are bytes and have no interpolation.
_FSTRING_PREFIXES = frozenset(('f', 'rf', 'fr'))

@staticmethod
def generate_tokens(source_code, addition='', token_class=None):
return ScriptLanguageMixIn.generate_common_tokens(
source_code,
r"|(?:\"\"\"(?:\\.|[^\"]|\"(?!\"\")|\"\"(?!\"))*\"\"\")" +
r"|(?:\'\'\'(?:\\.|[^\']|\'(?!\'\')|\'\'(?!\'))*\'\'\')",
token_class)
tokens = ScriptLanguageMixIn.generate_common_tokens(
source_code, _PY_TRIPLE_QUOTE, token_class)
return PythonReader._expand_fstring_interpolations(tokens, token_class)

@staticmethod
def _expand_fstring_interpolations(tokens, token_class):
"""Re-tokenize the {...} interpolations of f-strings so control-flow
keywords inside them reach the condition counter (#317).

The tokenizer emits the f-string prefix ('f', 'rf', ...) as its own
token right before the string body, so an f-string is a prefix token
immediately followed by a string token.
"""
prefix = None
for token in tokens:
if prefix is not None:
held, prefix = prefix, None
if isinstance(token, str) and token[:1] in ('"', "'"):
yield held
for inner in PythonReader._tokenize_fstring_body(
token, token_class):
yield inner
continue
yield held
# not a string after all; fall through to classify this token
if isinstance(token, str) and \
token.lower() in PythonReader._FSTRING_PREFIXES:
prefix = token
continue
yield token
if prefix is not None:
yield prefix

@staticmethod
def _tokenize_fstring_body(token, token_class):
"""Split an f-string body into literal chunks (kept as string tokens)
and interpolation expressions (re-tokenized so inner keywords count).

'{{' and '}}' are literal braces, not interpolations. If the body has no
interpolation, the original token is yielded unchanged.
"""
quote = token[:3] if token[:3] in ('"""', "'''") else token[:1]
body = token[len(quote):len(token) - len(quote)]
literal = []
produced = []
i, n = 0, len(body)
while i < n:
if body[i:i + 2] in ('{{', '}}'):
literal.append(body[i])
i += 2
continue
if body[i] == '{':
depth, j = 1, i + 1
while j < n and depth:
if body[j] == '{':
depth += 1
elif body[j] == '}':
depth -= 1
j += 1
if literal:
produced.append(quote + ''.join(literal) + quote)
literal = []
# The interpolation is Python code: tokenize it as such, which
# also expands any nested f-string (recursion terminates on the
# ever-shorter interpolation body).
produced.extend(PythonReader.generate_tokens(
body[i + 1:j - 1], '', token_class))
i = j
continue
literal.append(body[i])
i += 1
if not produced:
yield token # no interpolation; leave the string token untouched
return
if literal:
produced.append(quote + ''.join(literal) + quote)
for tok in produced:
yield tok

def process_token(self, token):
"""Process triple-quoted strings used as comments, and Python soft keywords.
Expand Down
50 changes: 50 additions & 0 deletions test/test_languages/testPython.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,5 +892,55 @@ def f(x):
modified = self._get_funcs(code, NestDepth(), Modified())
self.assertEqual(1, modified[0].cyclomatic_complexity)


class Test_Python_fstring_complexity(unittest.TestCase):
"""Control flow inside f-string {...} interpolations must be counted (#317)."""

def test_fstring_comprehension_counts_for(self):
functions = get_python_function_list(
'def f(items):\n return f"{\', \'.join([x for x in items])}"\n')
self.assertEqual(2, functions[0].cyclomatic_complexity)

def test_fstring_ternary_counts_if(self):
functions = get_python_function_list(
'def f(cond):\n return f"{\'a\' if cond else \'b\'}"\n')
self.assertEqual(2, functions[0].cyclomatic_complexity)

def test_fstring_logical_operators_counted(self):
functions = get_python_function_list(
'def f(a, b):\n return f"{a and b or a}"\n')
self.assertEqual(3, functions[0].cyclomatic_complexity)

def test_fstring_matches_non_fstring_equivalent(self):
with_fs = get_python_function_list(
'def f(i):\n return f"{[x for x in i]}"\n')
without = get_python_function_list(
'def f(i):\n return [x for x in i]\n')
self.assertEqual(without[0].cyclomatic_complexity,
with_fs[0].cyclomatic_complexity)

def test_keyword_inside_nested_string_not_counted(self):
# the 'if' lives inside a nested string literal; only 'or' is a real condition
functions = get_python_function_list(
'def f(x):\n return f"{x or \'use this if empty\'}"\n')
self.assertEqual(2, functions[0].cyclomatic_complexity)

def test_escaped_braces_are_not_interpolation(self):
# {{ and }} are literal braces, so the 'if' is plain text, not a condition
functions = get_python_function_list(
'def f():\n return f"{{ keep this if you can }}"\n')
self.assertEqual(1, functions[0].cyclomatic_complexity)

def test_plain_fstring_has_base_complexity(self):
functions = get_python_function_list(
'def f(name):\n return f"hello {name}"\n')
self.assertEqual(1, functions[0].cyclomatic_complexity)

def test_nested_fstring_counts_inner_control_flow(self):
functions = get_python_function_list(
'def f(items):\n return f"{f\'{[x for x in items]}\'}"\n')
self.assertEqual(2, functions[0].cyclomatic_complexity)


def top_level_function_for_test():
pass
Loading