diff --git a/lizard_languages/python.py b/lizard_languages/python.py index 897a8a4b..2abdca65 100644 --- a/lizard_languages/python.py +++ b/lizard_languages/python.py @@ -4,6 +4,14 @@ from .script_language import ScriptLanguageMixIn +# Triple-quoted string patterns for the common tokenizer, so a whole +# triple-quoted string is matched as a single token. +_PY_TRIPLE_QUOTE = ( + r"|(?:\"\"\"(?:\\.|[^\"]|\"(?!\"\")|\"\"(?!\"))*\"\"\")" + r"|(?:\'\'\'(?:\\.|[^\']|\'(?!\'\')|\'\'(?!\'))*\'\'\')" +) + + def count_spaces(token): return len(token.replace('\t', ' ' * 8)) @@ -59,13 +67,89 @@ def __init__(self, context): self._keyword_case = False # set by _soft_keyword_lookahead: True when 'case' is a soft keyword self._keyword_match = False # set by _soft_keyword_lookahead: True when 'match' is a soft keyword + # f-string prefixes (any case). rb/br are bytes and have no interpolation. + _FSTRING_PREFIXES = frozenset(('f', 'rf', 'fr')) + @staticmethod def generate_tokens(source_code, addition='', token_class=None): - return ScriptLanguageMixIn.generate_common_tokens( - source_code, - r"|(?:\"\"\"(?:\\.|[^\"]|\"(?!\"\")|\"\"(?!\"))*\"\"\")" + - r"|(?:\'\'\'(?:\\.|[^\']|\'(?!\'\')|\'\'(?!\'))*\'\'\')", - token_class) + tokens = ScriptLanguageMixIn.generate_common_tokens( + source_code, _PY_TRIPLE_QUOTE, token_class) + return PythonReader._expand_fstring_interpolations(tokens, token_class) + + @staticmethod + def _expand_fstring_interpolations(tokens, token_class): + """Re-tokenize the {...} interpolations of f-strings so control-flow + keywords inside them reach the condition counter (#317). + + The tokenizer emits the f-string prefix ('f', 'rf', ...) as its own + token right before the string body, so an f-string is a prefix token + immediately followed by a string token. + """ + prefix = None + for token in tokens: + if prefix is not None: + held, prefix = prefix, None + if isinstance(token, str) and token[:1] in ('"', "'"): + yield held + for inner in PythonReader._tokenize_fstring_body( + token, token_class): + yield inner + continue + yield held + # not a string after all; fall through to classify this token + if isinstance(token, str) and \ + token.lower() in PythonReader._FSTRING_PREFIXES: + prefix = token + continue + yield token + if prefix is not None: + yield prefix + + @staticmethod + def _tokenize_fstring_body(token, token_class): + """Split an f-string body into literal chunks (kept as string tokens) + and interpolation expressions (re-tokenized so inner keywords count). + + '{{' and '}}' are literal braces, not interpolations. If the body has no + interpolation, the original token is yielded unchanged. + """ + quote = token[:3] if token[:3] in ('"""', "'''") else token[:1] + body = token[len(quote):len(token) - len(quote)] + literal = [] + produced = [] + i, n = 0, len(body) + while i < n: + if body[i:i + 2] in ('{{', '}}'): + literal.append(body[i]) + i += 2 + continue + if body[i] == '{': + depth, j = 1, i + 1 + while j < n and depth: + if body[j] == '{': + depth += 1 + elif body[j] == '}': + depth -= 1 + j += 1 + if literal: + produced.append(quote + ''.join(literal) + quote) + literal = [] + # The interpolation is Python code: tokenize it as such, which + # also expands any nested f-string (recursion terminates on the + # ever-shorter interpolation body). + produced.extend(PythonReader.generate_tokens( + body[i + 1:j - 1], '', token_class)) + i = j + continue + literal.append(body[i]) + i += 1 + if not produced: + yield token # no interpolation; leave the string token untouched + return + if literal: + produced.append(quote + ''.join(literal) + quote) + for tok in produced: + yield tok def process_token(self, token): """Process triple-quoted strings used as comments, and Python soft keywords. diff --git a/test/test_languages/testPython.py b/test/test_languages/testPython.py index 4fcad983..899b8bf8 100644 --- a/test/test_languages/testPython.py +++ b/test/test_languages/testPython.py @@ -892,5 +892,55 @@ def f(x): modified = self._get_funcs(code, NestDepth(), Modified()) self.assertEqual(1, modified[0].cyclomatic_complexity) + +class Test_Python_fstring_complexity(unittest.TestCase): + """Control flow inside f-string {...} interpolations must be counted (#317).""" + + def test_fstring_comprehension_counts_for(self): + functions = get_python_function_list( + 'def f(items):\n return f"{\', \'.join([x for x in items])}"\n') + self.assertEqual(2, functions[0].cyclomatic_complexity) + + def test_fstring_ternary_counts_if(self): + functions = get_python_function_list( + 'def f(cond):\n return f"{\'a\' if cond else \'b\'}"\n') + self.assertEqual(2, functions[0].cyclomatic_complexity) + + def test_fstring_logical_operators_counted(self): + functions = get_python_function_list( + 'def f(a, b):\n return f"{a and b or a}"\n') + self.assertEqual(3, functions[0].cyclomatic_complexity) + + def test_fstring_matches_non_fstring_equivalent(self): + with_fs = get_python_function_list( + 'def f(i):\n return f"{[x for x in i]}"\n') + without = get_python_function_list( + 'def f(i):\n return [x for x in i]\n') + self.assertEqual(without[0].cyclomatic_complexity, + with_fs[0].cyclomatic_complexity) + + def test_keyword_inside_nested_string_not_counted(self): + # the 'if' lives inside a nested string literal; only 'or' is a real condition + functions = get_python_function_list( + 'def f(x):\n return f"{x or \'use this if empty\'}"\n') + self.assertEqual(2, functions[0].cyclomatic_complexity) + + def test_escaped_braces_are_not_interpolation(self): + # {{ and }} are literal braces, so the 'if' is plain text, not a condition + functions = get_python_function_list( + 'def f():\n return f"{{ keep this if you can }}"\n') + self.assertEqual(1, functions[0].cyclomatic_complexity) + + def test_plain_fstring_has_base_complexity(self): + functions = get_python_function_list( + 'def f(name):\n return f"hello {name}"\n') + self.assertEqual(1, functions[0].cyclomatic_complexity) + + def test_nested_fstring_counts_inner_control_flow(self): + functions = get_python_function_list( + 'def f(items):\n return f"{f\'{[x for x in items]}\'}"\n') + self.assertEqual(2, functions[0].cyclomatic_complexity) + + def top_level_function_for_test(): pass