diff --git a/.github/workflows/pr-validation.yml b/.github/workflows/pr-validation.yml index 2c6bdf0..6d73bba 100644 --- a/.github/workflows/pr-validation.yml +++ b/.github/workflows/pr-validation.yml @@ -9,7 +9,7 @@ on: types: [opened, synchronize, reopened] jobs: - test-and-lint: + lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -19,11 +19,10 @@ jobs: with: python-version: '3.13' - - name: Install dependencies + - name: Install linting tools run: | python -m pip install --upgrade pip - pip install pytest ruff black - pip install -e . || pip install . + pip install ruff black - name: Format check with black run: | @@ -33,10 +32,6 @@ jobs: run: | ruff check . || echo "Linting issues found (non-blocking)" - - name: Run tests - run: | - pytest tests/ -v --tb=short - documentation: runs-on: ubuntu-latest steps: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0f8a200..7a380f5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest] - python-version: ["3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 @@ -32,6 +32,11 @@ jobs: python -m pip install --upgrade pip pip install -e ".[dev]" || pip install ".[dev]" + - name: Install multi-language support (Python < 3.13) + if: matrix.python-version != '3.13' + run: | + pip install -e ".[multilang]" || pip install ".[multilang]" + - name: Run tests run: | pytest tests/ -v --tb=short diff --git a/CHANGELOG.md b/CHANGELOG.md index 77d0a83..dd69855 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,66 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.2.1] - 2025-12-08 + +### Added +- **Advanced Code Complexity Metrics** + - Halstead complexity metrics (vocabulary, length, volume, difficulty, effort, time, bugs) + - Multi-language support for Halstead analysis via tree-sitter + - Python, JavaScript, TypeScript, Java, C, C++, C#, Go, Rust, PHP, Ruby, Swift + - Automated complexity analysis from code AST without user input + - Graceful fallback to Python AST when tree-sitter unavailable + - Support for calculating estimated bugs based on Halstead volume + +- **Git Repository History Analysis** + - Commit count and contributor tracking + - Repository age calculation (days and years) + - Release/tag counting for maturity assessment + - Commit frequency analysis (commits per month) + - File churn detection (frequently modified files) + - Bus factor calculation (key contributor dependency) + - Average files changed per commit metric + +- **Maintainability Index** + - Microsoft's Maintainability Index calculation (0-100 scale) + - Combines Halstead volume, cyclomatic complexity, LOC, and comment ratio + - Automatic classification (Low/Medium/High maintainability) + - Comment ratio tracking + - Complexity per KLOC calculation + +- **Enhanced Cost Estimation** + - Maturity multiplier (1.0x - 2.5x) based on project age, contributors, and commits + - Halstead-based complexity multiplier (0.8x - 1.8x) based on code difficulty + - Improved confidence scoring incorporating all available metrics + - More accurate estimates for mature, large-scale projects + +- **Testing** + - 50+ new unit tests for all analyzers + - Comprehensive git history analyzer tests (9 tests) + - Halstead complexity analyzer tests (15 tests) + - Multi-language test coverage (JavaScript, TypeScript, Java, Go, Rust) + - Language detection tests + - Multi-language directory analysis tests + - Maintainability calculator tests (10 tests) + - Enhanced multiplier tests (13 tests) + - End-to-end integration tests (4 tests) + - Total test suite: 94 tests, all passing + +### Changed +- COCOMO II estimator now incorporates maturity and Halstead multipliers +- SLOCCount estimator updated with new multiplier support +- Cost estimates now include maturity_multiplier and halstead_multiplier fields +- Confidence scores adjusted to account for additional metrics +- Dependencies: Multi-language support (tree-sitter) now optional via `pip install ossval[multilang]` + - Available on Python 3.10-3.12 only (tree-sitter-languages doesn't support 3.13 yet) + - Python-only Halstead analysis still available on all Python versions via built-in AST + - Graceful fallback when tree-sitter not installed + +### Improved +- More accurate cost estimates for mature projects with extensive history +- Better handling of complex codebases through Halstead metrics +- Realistic valuation for framework-level projects (e.g., ReactJS) + ## [1.0.1] - 2025-11-29 ### Added @@ -98,5 +158,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Efficient SLOC counting with pygount - Optimized repository URL discovery -[Unreleased]: https://github.com/SemClone/ossval/compare/v1.0.1...HEAD +[Unreleased]: https://github.com/SemClone/ossval/compare/v1.2.1...HEAD +[1.2.1]: https://github.com/SemClone/ossval/compare/v1.0.1...v1.2.1 [1.0.1]: https://github.com/SemClone/ossval/releases/tag/v1.0.1 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a70de3a..d5479de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ossval" -version = "1.0.1" +version = "1.2.1" description = "Open Source Software Valuation - Calculate development cost savings from OSS dependencies" readme = "README.md" requires-python = ">=3.10" @@ -21,6 +21,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Quality Assurance", ] @@ -39,6 +40,11 @@ dependencies = [ ] [project.optional-dependencies] +# Multi-language Halstead support (Python 3.10-3.12 only) +multilang = [ + "tree-sitter>=0.21.0,<0.22.0; python_version < '3.13'", + "tree-sitter-languages>=1.10.0; python_version < '3.13'", +] dev = [ "pytest>=7.0.0", "pytest-asyncio>=0.21.0", diff --git a/src/ossval/__init__.py b/src/ossval/__init__.py index 93a6db0..a71abc0 100644 --- a/src/ossval/__init__.py +++ b/src/ossval/__init__.py @@ -1,6 +1,6 @@ """OSSVAL: Open Source Software Valuation Tool.""" -__version__ = "1.0.1" +__version__ = "1.2.1" from ossval.core import analyze, parse_sbom, quick_estimate from ossval.models import AnalysisConfig, AnalysisResult, Region, ProjectType diff --git a/src/ossval/analyzers/__init__.py b/src/ossval/analyzers/__init__.py index 7762717..b320ead 100644 --- a/src/ossval/analyzers/__init__.py +++ b/src/ossval/analyzers/__init__.py @@ -1,7 +1,10 @@ """Code analysis modules.""" from ossval.analyzers.complexity import analyze_complexity, get_complexity_level +from ossval.analyzers.git_history import analyze_git_history +from ossval.analyzers.halstead import analyze_directory_halstead from ossval.analyzers.health import analyze_health +from ossval.analyzers.maintainability import calculate_maintainability_index from ossval.analyzers.repo_finder import find_repository_url from ossval.analyzers.sloc import analyze_sloc @@ -11,5 +14,8 @@ "analyze_complexity", "get_complexity_level", "analyze_health", + "analyze_git_history", + "analyze_directory_halstead", + "calculate_maintainability_index", ] diff --git a/src/ossval/analyzers/git_history.py b/src/ossval/analyzers/git_history.py new file mode 100644 index 0000000..0ff49e7 --- /dev/null +++ b/src/ossval/analyzers/git_history.py @@ -0,0 +1,287 @@ +"""Git history metrics analyzer.""" + +import asyncio +import re +from datetime import datetime, timedelta +from pathlib import Path +from typing import Dict, List, Optional + +from ossval.models import GitHistoryMetrics + + +async def analyze_git_history( + repo_path: Path, use_cache: bool = True +) -> Optional[GitHistoryMetrics]: + """ + Analyze git repository history for maturity and scale metrics. + + Args: + repo_path: Path to git repository + use_cache: Whether to use cached results + + Returns: + GitHistoryMetrics if successful, None otherwise + """ + if not (repo_path / ".git").exists(): + return None + + try: + # Get total commit count + commit_count = await _get_commit_count(repo_path) + + # Get contributor count and top contributors + contributors = await _get_contributors(repo_path) + contributor_count = len(contributors) + + # Get repository age + first_commit_date = await _get_first_commit_date(repo_path) + last_commit_date = await _get_last_commit_date(repo_path) + + age_days = 0 + age_years = 0.0 + if first_commit_date and last_commit_date: + age_days = (last_commit_date - first_commit_date).days + age_years = age_days / 365.25 + + # Get release/tag count + release_count = await _get_release_count(repo_path) + + # Calculate commit frequency (commits per month over last year) + recent_commits = await _get_recent_commit_count(repo_path, days=365) + commits_per_month = (recent_commits / 12.0) if recent_commits else 0.0 + + # Get file churn metrics (files changed frequently) + file_churn = await _get_file_churn(repo_path) + + # Get average commit size (files per commit) + avg_files_per_commit = await _get_avg_files_per_commit(repo_path) + + # Calculate bus factor (simplified: top contributors with >5% of commits) + bus_factor = _calculate_bus_factor(contributors, commit_count) + + return GitHistoryMetrics( + commit_count=commit_count, + contributor_count=contributor_count, + age_days=age_days, + age_years=age_years, + first_commit_date=first_commit_date, + last_commit_date=last_commit_date, + release_count=release_count, + commits_per_month=commits_per_month, + avg_files_per_commit=avg_files_per_commit, + high_churn_files=file_churn, + bus_factor=bus_factor, + ) + + except Exception as e: + return None + + +async def _get_commit_count(repo_path: Path) -> int: + """Get total commit count.""" + try: + proc = await asyncio.create_subprocess_exec( + "git", + "rev-list", + "--count", + "HEAD", + cwd=repo_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + return int(stdout.decode().strip()) + except Exception: + return 0 + + +async def _get_contributors(repo_path: Path) -> Dict[str, int]: + """Get contributor list with commit counts.""" + try: + proc = await asyncio.create_subprocess_exec( + "git", + "shortlog", + "-sn", + "--all", + cwd=repo_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + + contributors = {} + for line in stdout.decode().splitlines(): + match = re.match(r"\s*(\d+)\s+(.+)", line) + if match: + count, name = match.groups() + contributors[name] = int(count) + + return contributors + except Exception: + return {} + + +async def _get_first_commit_date(repo_path: Path) -> Optional[datetime]: + """Get date of first commit.""" + try: + proc = await asyncio.create_subprocess_exec( + "git", + "log", + "--reverse", + "--format=%aI", + "--max-count=1", + cwd=repo_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + date_str = stdout.decode().strip() + if date_str: + return datetime.fromisoformat(date_str.replace("Z", "+00:00")) + except Exception: + pass + return None + + +async def _get_last_commit_date(repo_path: Path) -> Optional[datetime]: + """Get date of last commit.""" + try: + proc = await asyncio.create_subprocess_exec( + "git", + "log", + "--format=%aI", + "--max-count=1", + cwd=repo_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + date_str = stdout.decode().strip() + if date_str: + return datetime.fromisoformat(date_str.replace("Z", "+00:00")) + except Exception: + pass + return None + + +async def _get_release_count(repo_path: Path) -> int: + """Get count of releases/tags.""" + try: + proc = await asyncio.create_subprocess_exec( + "git", + "tag", + "--list", + cwd=repo_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + tags = stdout.decode().strip().split("\n") + return len([t for t in tags if t]) + except Exception: + return 0 + + +async def _get_recent_commit_count(repo_path: Path, days: int = 365) -> int: + """Get commit count in recent period.""" + try: + since_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d") + proc = await asyncio.create_subprocess_exec( + "git", + "rev-list", + "--count", + f"--since={since_date}", + "HEAD", + cwd=repo_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + return int(stdout.decode().strip()) + except Exception: + return 0 + + +async def _get_file_churn(repo_path: Path, top_n: int = 10) -> int: + """Get count of high-churn files (changed frequently).""" + try: + proc = await asyncio.create_subprocess_exec( + "git", + "log", + "--format=", + "--name-only", + "--since=1.year", + cwd=repo_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + + file_changes: Dict[str, int] = {} + for line in stdout.decode().splitlines(): + line = line.strip() + if line: + file_changes[line] = file_changes.get(line, 0) + 1 + + # Count files changed more than 10 times in the last year + high_churn = len([f for f, count in file_changes.items() if count > 10]) + return high_churn + except Exception: + return 0 + + +async def _get_avg_files_per_commit(repo_path: Path) -> float: + """Get average files changed per commit.""" + try: + # Get total files changed across all commits + proc1 = await asyncio.create_subprocess_exec( + "git", + "log", + "--format=", + "--name-only", + "--all", + cwd=repo_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout1, _ = await proc1.communicate() + file_count = len([line for line in stdout1.decode().splitlines() if line.strip()]) + + # Get commit count + commit_count = await _get_commit_count(repo_path) + + if commit_count > 0: + return file_count / commit_count + return 0.0 + except Exception: + return 0.0 + + +def _calculate_bus_factor(contributors: Dict[str, int], total_commits: int) -> int: + """ + Calculate bus factor (number of contributors needed to account for 50% of commits). + + Args: + contributors: Dict mapping contributor name to commit count + total_commits: Total commit count + + Returns: + Bus factor (minimum number of people who do 50% of work) + """ + if not contributors or total_commits == 0: + return 1 + + # Sort by commit count descending + sorted_contributors = sorted(contributors.items(), key=lambda x: x[1], reverse=True) + + cumulative = 0 + bus_factor = 0 + threshold = total_commits * 0.5 + + for name, commits in sorted_contributors: + cumulative += commits + bus_factor += 1 + if cumulative >= threshold: + break + + return max(1, bus_factor) diff --git a/src/ossval/analyzers/halstead.py b/src/ossval/analyzers/halstead.py new file mode 100644 index 0000000..ea8d46b --- /dev/null +++ b/src/ossval/analyzers/halstead.py @@ -0,0 +1,492 @@ +"""Halstead complexity metrics analyzer with multi-language support.""" + +import ast +from pathlib import Path +from typing import Dict, Optional, Set + +try: + import tree_sitter_languages as tsl + TREE_SITTER_AVAILABLE = True +except ImportError: + TREE_SITTER_AVAILABLE = False + +from ossval.models import HalsteadMetrics + + +# Language file extension mapping +LANGUAGE_EXTENSIONS = { + "python": [".py", ".pyw"], + "javascript": [".js", ".mjs", ".cjs"], + "typescript": [".ts", ".tsx"], + "java": [".java"], + "c": [".c", ".h"], + "cpp": [".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx"], + "c_sharp": [".cs"], + "go": [".go"], + "rust": [".rs"], + "php": [".php"], + "ruby": [".rb"], + "swift": [".swift"], +} + +# Operator and operand node types by language +OPERATOR_TYPES = { + "python": { + "binary_operator", "unary_operator", "comparison_operator", + "boolean_operator", "augmented_assignment", "attribute", + "subscript", "call", "return_statement", "if_statement", + "for_statement", "while_statement", "with_statement", + "try_statement", "raise_statement", "assert_statement", + "import_statement", "import_from_statement", "class_definition", + "function_definition", "lambda", + }, + "javascript": { + "binary_expression", "unary_expression", "update_expression", + "assignment_expression", "call_expression", "member_expression", + "subscript_expression", "return_statement", "if_statement", + "for_statement", "while_statement", "switch_statement", + "try_statement", "throw_statement", "function_declaration", + "class_declaration", "arrow_function", "new_expression", + }, + "typescript": { + "binary_expression", "unary_expression", "update_expression", + "assignment_expression", "call_expression", "member_expression", + "subscript_expression", "return_statement", "if_statement", + "for_statement", "while_statement", "switch_statement", + "try_statement", "throw_statement", "function_declaration", + "class_declaration", "arrow_function", "new_expression", + "interface_declaration", "type_alias_declaration", + }, + "java": { + "binary_expression", "unary_expression", "update_expression", + "assignment_expression", "method_invocation", "field_access", + "array_access", "return_statement", "if_statement", + "for_statement", "while_statement", "switch_expression", + "try_statement", "throw_statement", "method_declaration", + "class_declaration", "constructor_declaration", + }, + "c": { + "binary_expression", "unary_expression", "update_expression", + "assignment_expression", "call_expression", "field_expression", + "subscript_expression", "return_statement", "if_statement", + "for_statement", "while_statement", "switch_statement", + "function_definition", "struct_specifier", + }, + "cpp": { + "binary_expression", "unary_expression", "update_expression", + "assignment_expression", "call_expression", "field_expression", + "subscript_expression", "return_statement", "if_statement", + "for_statement", "while_statement", "switch_statement", + "function_definition", "class_specifier", "namespace_definition", + }, + "c_sharp": { + "binary_expression", "prefix_unary_expression", "postfix_unary_expression", + "assignment_expression", "invocation_expression", "member_access_expression", + "element_access_expression", "return_statement", "if_statement", + "for_statement", "while_statement", "switch_statement", + "try_statement", "throw_statement", "method_declaration", + "class_declaration", "interface_declaration", + }, + "go": { + "binary_expression", "unary_expression", "assignment_statement", + "call_expression", "selector_expression", "index_expression", + "return_statement", "if_statement", "for_statement", + "switch_statement", "function_declaration", "method_declaration", + "type_declaration", "struct_type", + }, + "rust": { + "binary_expression", "unary_expression", "assignment_expression", + "call_expression", "field_expression", "index_expression", + "return_expression", "if_expression", "loop_expression", + "for_expression", "while_expression", "match_expression", + "function_item", "impl_item", "trait_item", "struct_item", + }, + "php": { + "binary_expression", "unary_op_expression", "assignment_expression", + "function_call_expression", "member_access_expression", + "subscript_expression", "return_statement", "if_statement", + "for_statement", "while_statement", "switch_statement", + "try_statement", "throw_statement", "function_definition", + "class_declaration", "method_declaration", + }, + "ruby": { + "binary", "unary", "assignment", "call", "element_reference", + "return", "if", "unless", "while", "until", "for", + "case", "begin", "method", "class", "module", + }, + "swift": { + "binary_expression", "prefix_expression", "postfix_expression", + "assignment", "call_expression", "navigation_expression", + "subscript_expression", "return_statement", "if_statement", + "for_statement", "while_statement", "switch_statement", + "do_statement", "throw_statement", "function_declaration", + "class_declaration", "protocol_declaration", + }, +} + +OPERAND_TYPES = { + "python": {"identifier", "integer", "float", "string", "true", "false", "none"}, + "javascript": {"identifier", "number", "string", "true", "false", "null", "undefined"}, + "typescript": {"identifier", "number", "string", "true", "false", "null", "undefined"}, + "java": {"identifier", "decimal_integer_literal", "string_literal", "true", "false", "null_literal"}, + "c": {"identifier", "number_literal", "string_literal", "char_literal"}, + "cpp": {"identifier", "number_literal", "string_literal", "char_literal", "true", "false"}, + "c_sharp": {"identifier", "integer_literal", "string_literal", "character_literal", "true", "false", "null"}, + "go": {"identifier", "int_literal", "float_literal", "string_literal", "rune_literal", "true", "false", "nil"}, + "rust": {"identifier", "integer_literal", "float_literal", "string_literal", "char_literal", "true", "false"}, + "php": {"name", "integer", "float", "string", "true", "false", "null"}, + "ruby": {"identifier", "integer", "float", "string", "symbol", "true", "false", "nil"}, + "swift": {"simple_identifier", "integer_literal", "real_literal", "string_literal", "true", "false", "nil"}, +} + + +class PythonHalsteadAnalyzer(ast.NodeVisitor): + """AST visitor to compute Halstead metrics for Python code (fallback).""" + + def __init__(self): + """Initialize the analyzer.""" + self.operators: Set[str] = set() + self.operands: Set[str] = set() + self.operator_count = 0 + self.operand_count = 0 + + def visit_BinOp(self, node): + """Visit binary operators.""" + self.operators.add(node.op.__class__.__name__) + self.operator_count += 1 + self.generic_visit(node) + + def visit_UnaryOp(self, node): + """Visit unary operators.""" + self.operators.add(node.op.__class__.__name__) + self.operator_count += 1 + self.generic_visit(node) + + def visit_Compare(self, node): + """Visit comparison operators.""" + for op in node.ops: + self.operators.add(op.__class__.__name__) + self.operator_count += 1 + self.generic_visit(node) + + def visit_BoolOp(self, node): + """Visit boolean operators.""" + self.operators.add(node.op.__class__.__name__) + self.operator_count += 1 + self.generic_visit(node) + + def visit_Call(self, node): + """Visit function calls.""" + self.operators.add("Call") + self.operator_count += 1 + self.generic_visit(node) + + def visit_Assign(self, node): + """Visit assignments.""" + self.operators.add("Assign") + self.operator_count += 1 + self.generic_visit(node) + + def visit_AugAssign(self, node): + """Visit augmented assignments.""" + self.operators.add(f"AugAssign_{node.op.__class__.__name__}") + self.operator_count += 1 + self.generic_visit(node) + + def visit_If(self, node): + """Visit if statements.""" + self.operators.add("If") + self.operator_count += 1 + self.generic_visit(node) + + def visit_For(self, node): + """Visit for loops.""" + self.operators.add("For") + self.operator_count += 1 + self.generic_visit(node) + + def visit_While(self, node): + """Visit while loops.""" + self.operators.add("While") + self.operator_count += 1 + self.generic_visit(node) + + def visit_Return(self, node): + """Visit return statements.""" + self.operators.add("Return") + self.operator_count += 1 + self.generic_visit(node) + + def visit_FunctionDef(self, node): + """Visit function definitions.""" + self.operators.add("FunctionDef") + self.operator_count += 1 + self.generic_visit(node) + + def visit_ClassDef(self, node): + """Visit class definitions.""" + self.operators.add("ClassDef") + self.operator_count += 1 + self.generic_visit(node) + + def visit_Name(self, node): + """Visit variable names.""" + self.operands.add(node.id) + self.operand_count += 1 + self.generic_visit(node) + + def visit_Constant(self, node): + """Visit constants.""" + self.operands.add(str(node.value)) + self.operand_count += 1 + self.generic_visit(node) + + def visit_Attribute(self, node): + """Visit attribute access.""" + self.operands.add(node.attr) + self.operand_count += 1 + self.generic_visit(node) + + +def detect_language(file_path: Path) -> Optional[str]: + """ + Detect programming language from file extension. + + Args: + file_path: Path to source file + + Returns: + Language name or None + """ + ext = file_path.suffix.lower() + for language, extensions in LANGUAGE_EXTENSIONS.items(): + if ext in extensions: + return language + return None + + +def analyze_with_tree_sitter(file_path: Path, language: str) -> Optional[HalsteadMetrics]: + """ + Analyze file using tree-sitter. + + Args: + file_path: Path to source file + language: Programming language + + Returns: + HalsteadMetrics or None + """ + if not TREE_SITTER_AVAILABLE: + return None + + try: + parser = tsl.get_parser(language) + with open(file_path, "rb") as f: + source_code = f.read() + + tree = parser.parse(source_code) + root_node = tree.root_node + + operators: Set[str] = set() + operands: Set[str] = set() + operator_count = 0 + operand_count = 0 + + operator_types = OPERATOR_TYPES.get(language, set()) + operand_types = OPERAND_TYPES.get(language, set()) + + def traverse(node): + nonlocal operator_count, operand_count + + if node.type in operator_types: + operators.add(node.type) + operator_count += 1 + + if node.type in operand_types: + operand_text = source_code[node.start_byte:node.end_byte].decode('utf-8', errors='ignore') + operands.add(operand_text[:50]) # Limit length + operand_count += 1 + + for child in node.children: + traverse(child) + + traverse(root_node) + + n1 = len(operators) + n2 = len(operands) + N1 = operator_count + N2 = operand_count + + if n1 == 0 or n2 == 0: + return None + + vocabulary = n1 + n2 + length = N1 + N2 + calculated_length = n1 * (n1 / 2 if n1 > 0 else 0) + n2 * (n2 / 2 if n2 > 0 else 0) + volume = length * (vocabulary.bit_length() if vocabulary > 0 else 0) + difficulty = (n1 / 2.0) * (N2 / n2 if n2 > 0 else 0) + effort = difficulty * volume + time_seconds = effort / 18.0 + bugs = volume / 3000.0 + + return HalsteadMetrics( + vocabulary=vocabulary, + length=length, + calculated_length=calculated_length, + volume=volume, + difficulty=difficulty, + effort=effort, + time_seconds=time_seconds, + bugs=bugs, + ) + + except Exception: + return None + + +def analyze_python_file_fallback(file_path: Path) -> Optional[HalsteadMetrics]: + """ + Analyze Python file using built-in AST (fallback). + + Args: + file_path: Path to Python file + + Returns: + HalsteadMetrics or None + """ + try: + with open(file_path, "r", encoding="utf-8") as f: + source = f.read() + + tree = ast.parse(source) + analyzer = PythonHalsteadAnalyzer() + analyzer.visit(tree) + + n1 = len(analyzer.operators) + n2 = len(analyzer.operands) + N1 = analyzer.operator_count + N2 = analyzer.operand_count + + if n1 == 0 or n2 == 0: + return None + + vocabulary = n1 + n2 + length = N1 + N2 + calculated_length = n1 * (n1 / 2 if n1 > 0 else 0) + n2 * (n2 / 2 if n2 > 0 else 0) + volume = length * (vocabulary.bit_length() if vocabulary > 0 else 0) + difficulty = (n1 / 2.0) * (N2 / n2 if n2 > 0 else 0) + effort = difficulty * volume + time_seconds = effort / 18.0 + bugs = volume / 3000.0 + + return HalsteadMetrics( + vocabulary=vocabulary, + length=length, + calculated_length=calculated_length, + volume=volume, + difficulty=difficulty, + effort=effort, + time_seconds=time_seconds, + bugs=bugs, + ) + + except Exception: + return None + + +def analyze_python_file(file_path: Path) -> Optional[HalsteadMetrics]: + """ + Analyze a Python file for Halstead metrics. + + Args: + file_path: Path to Python file + + Returns: + HalsteadMetrics or None + """ + # Try tree-sitter first, fall back to AST + if TREE_SITTER_AVAILABLE: + result = analyze_with_tree_sitter(file_path, "python") + if result: + return result + + return analyze_python_file_fallback(file_path) + + +def analyze_source_file(file_path: Path) -> Optional[HalsteadMetrics]: + """ + Analyze any supported source file for Halstead metrics. + + Args: + file_path: Path to source file + + Returns: + HalsteadMetrics or None + """ + language = detect_language(file_path) + if not language: + return None + + # For Python, use fallback if tree-sitter not available + if language == "python": + return analyze_python_file(file_path) + + # For other languages, require tree-sitter + if not TREE_SITTER_AVAILABLE: + return None + + return analyze_with_tree_sitter(file_path, language) + + +def analyze_directory_halstead(repo_path: Path) -> Optional[HalsteadMetrics]: + """ + Analyze all supported source files in a directory for aggregate Halstead metrics. + + Args: + repo_path: Path to repository + + Returns: + Aggregated HalsteadMetrics or None + """ + total_volume = 0.0 + total_difficulty = 0.0 + total_effort = 0.0 + total_time = 0.0 + total_bugs = 0.0 + file_count = 0 + + # Get all extensions to search for + all_extensions = set() + for extensions in LANGUAGE_EXTENSIONS.values(): + all_extensions.update(extensions) + + for ext in all_extensions: + for source_file in repo_path.rglob(f"*{ext}"): + # Skip common non-source directories + if any(skip in str(source_file) for skip in ["venv", "node_modules", ".git", "build", "dist", "target"]): + continue + + metrics = analyze_source_file(source_file) + if metrics: + total_volume += metrics.volume + total_difficulty += metrics.difficulty + total_effort += metrics.effort + total_time += metrics.time_seconds + total_bugs += metrics.bugs + file_count += 1 + + if file_count == 0: + return None + + avg_difficulty = total_difficulty / file_count + + return HalsteadMetrics( + vocabulary=0, # Not meaningful at aggregate level + length=0, # Not meaningful at aggregate level + calculated_length=0, # Not meaningful at aggregate level + volume=total_volume, + difficulty=avg_difficulty, + effort=total_effort, + time_seconds=total_time, + bugs=total_bugs, + ) diff --git a/src/ossval/analyzers/maintainability.py b/src/ossval/analyzers/maintainability.py new file mode 100644 index 0000000..757010d --- /dev/null +++ b/src/ossval/analyzers/maintainability.py @@ -0,0 +1,101 @@ +"""Maintainability Index analyzer.""" + +import math +from typing import Optional + +from ossval.models import ( + ComplexityMetrics, + HalsteadMetrics, + MaintainabilityMetrics, + SLOCMetrics, +) + + +def calculate_maintainability_index( + sloc: SLOCMetrics, + halstead: Optional[HalsteadMetrics] = None, + complexity: Optional[ComplexityMetrics] = None, +) -> Optional[MaintainabilityMetrics]: + """ + Calculate Maintainability Index (MI). + + Microsoft's formula: + MI = MAX(0, 171 - 5.2 × ln(HV) - 0.23 × CC - 16.2 × ln(LOC) + 50 × sin(sqrt(2.4 × CM))) + + Where: + - HV = Halstead Volume + - CC = Cyclomatic Complexity (average) + - LOC = Lines of Code + - CM = Comment Ratio (% of lines that are comments) + + Simplified formula (when Halstead not available): + MI = 171 - 5.2 × ln(LOC) - 0.23 × CC - 16.2 × ln(LOC) + + Args: + sloc: SLOC metrics + halstead: Optional Halstead metrics + complexity: Optional complexity metrics + + Returns: + MaintainabilityMetrics if calculable, None otherwise + """ + if not sloc or sloc.code_lines == 0: + return None + + # Get base values + loc = sloc.code_lines + comment_lines = sloc.comment_lines + total_lines = sloc.total + + # Calculate comment ratio + comment_ratio = comment_lines / total_lines if total_lines > 0 else 0.0 + + # Get cyclomatic complexity + cyclomatic_avg = 10.0 # Default moderate complexity + complexity_per_kloc = 10.0 + if complexity and complexity.cyclomatic_complexity_avg: + cyclomatic_avg = complexity.cyclomatic_complexity_avg + complexity_per_kloc = (cyclomatic_avg / loc) * 1000 if loc > 0 else 10.0 + + # Calculate MI using available metrics + try: + if halstead and halstead.volume > 0: + # Full formula with Halstead + halstead_term = 5.2 * math.log(halstead.volume) + complexity_term = 0.23 * cyclomatic_avg + loc_term = 16.2 * math.log(loc) + comment_term = 50 * math.sin(math.sqrt(2.4 * comment_ratio)) + + mi = 171 - halstead_term - complexity_term - loc_term + comment_term + else: + # Simplified formula without Halstead + # Use alternative formula: 171 - 5.2 × ln(V) - 0.23 × CC - 16.2 × ln(LOC) + # Where V is estimated as LOC × 4.79 (empirical approximation) + estimated_volume = loc * 4.79 + halstead_term = 5.2 * math.log(max(1, estimated_volume)) + complexity_term = 0.23 * cyclomatic_avg + loc_term = 16.2 * math.log(max(1, loc)) + comment_term = 50 * math.sin(math.sqrt(2.4 * comment_ratio)) + + mi = 171 - halstead_term - complexity_term - loc_term + comment_term + + # Normalize to 0-100 range + mi = max(0, min(100, mi)) + + # Classify maintainability level + if mi >= 20: + level = "High" # Green: highly maintainable + elif mi >= 10: + level = "Medium" # Yellow: moderately maintainable + else: + level = "Low" # Red: difficult to maintain + + return MaintainabilityMetrics( + maintainability_index=mi, + maintainability_level=level, + comment_ratio=comment_ratio, + avg_complexity_per_kloc=complexity_per_kloc, + ) + + except (ValueError, ZeroDivisionError): + return None diff --git a/src/ossval/cli.py b/src/ossval/cli.py index 237bc00..b5cd30b 100644 --- a/src/ossval/cli.py +++ b/src/ossval/cli.py @@ -75,6 +75,12 @@ def main(): default="cocomo2", help="Cost estimation methodology", ) +@click.option( + "--type", + "-t", + type=click.Choice([pt.value for pt in ProjectType], case_sensitive=False), + help="Override project type detection", +) @click.option("--verbose", "-v", is_flag=True, help="Verbose output") @click.option("--quiet", "-q", is_flag=True, help="Quiet mode") def analyze_cmd( @@ -88,6 +94,7 @@ def analyze_cmd( concurrency, github_token, methodology, + type, verbose, quiet, ): @@ -101,6 +108,7 @@ def analyze_cmd( concurrency=concurrency, github_token=github_token or os.getenv("GITHUB_TOKEN"), methodology=methodology, + project_type_override=ProjectType(type) if type else None, verbose=verbose, quiet=quiet, ) diff --git a/src/ossval/core.py b/src/ossval/core.py index f86b989..d9269c5 100644 --- a/src/ossval/core.py +++ b/src/ossval/core.py @@ -8,8 +8,11 @@ from ossval import __version__ from ossval.analyzers import ( analyze_complexity, + analyze_directory_halstead, + analyze_git_history, analyze_health, analyze_sloc, + calculate_maintainability_index, find_repository_url, ) from ossval.data.project_types import detect_project_type @@ -101,8 +104,11 @@ async def _analyze_package( if repo_url: package.repository_url = repo_url - # Detect project type - if package.project_type == ProjectType.LIBRARY: + # Detect project type (or use override) + if config.project_type_override: + package.project_type = config.project_type_override + package.project_type_detection = {"source": "cli_override", "confidence": 1.0} + elif package.project_type == ProjectType.LIBRARY: project_type, detection_details = detect_project_type( package.name, package.repository_url ) @@ -110,6 +116,7 @@ async def _analyze_package( package.project_type_detection = detection_details # Analyze SLOC if repository URL is available and cloning is enabled + repo_path = None if config.clone_repos and package.repository_url: cache_dir = str(cache.cache_dir) if cache else None try: @@ -121,6 +128,11 @@ async def _analyze_package( if sloc and sloc.total > 0: package.sloc = sloc package.language = _infer_language_from_sloc(sloc) + + # Store repo path for additional analysis + if cache_dir: + repo_name = package.repository_url.split("/")[-1].replace(".git", "") + repo_path = Path(cache_dir) / "repos" / repo_name elif sloc is None: # Failed to get SLOC - add warning package.warnings.append( @@ -129,6 +141,24 @@ async def _analyze_package( except Exception as e: package.warnings.append(f"Error analyzing SLOC: {str(e)}") + # Analyze Halstead metrics if we have a cloned repository + if repo_path and repo_path.exists(): + try: + halstead = analyze_directory_halstead(repo_path) + if halstead: + package.halstead = halstead + except Exception as e: + package.warnings.append(f"Error analyzing Halstead metrics: {str(e)}") + + # Analyze git history if we have a cloned repository + if repo_path and repo_path.exists(): + try: + git_history = await analyze_git_history(repo_path, use_cache=config.use_cache) + if git_history: + package.git_history = git_history + except Exception as e: + package.warnings.append(f"Error analyzing git history: {str(e)}") + # Analyze complexity (if we have code) # Note: For now, we'll use default complexity if no code is available if package.complexity is None: @@ -137,6 +167,17 @@ async def _analyze_package( package.complexity = ComplexityMetrics(complexity_level=ComplexityLevel.MODERATE) + # Calculate maintainability index if we have SLOC + if package.sloc: + try: + maintainability = calculate_maintainability_index( + package.sloc, package.halstead, package.complexity + ) + if maintainability: + package.maintainability = maintainability + except Exception as e: + package.warnings.append(f"Error calculating maintainability index: {str(e)}") + # Analyze health metrics (GitHub only) if package.repository_url and "github.com" in package.repository_url.lower(): health = await analyze_health(package.repository_url, config.github_token) @@ -379,6 +420,7 @@ async def analyze( "region": config.region.value, "clone_repos": config.clone_repos, "methodology": config.methodology, + "project_type_override": config.project_type_override.value if config.project_type_override else None, }, }, summary={ diff --git a/src/ossval/data/multipliers.py b/src/ossval/data/multipliers.py index 4355176..c94df9d 100644 --- a/src/ossval/data/multipliers.py +++ b/src/ossval/data/multipliers.py @@ -1,6 +1,8 @@ """Multipliers for project types and complexity levels.""" -from ossval.models import ComplexityLevel, ProjectType +from typing import Optional + +from ossval.models import ComplexityLevel, GitHistoryMetrics, HalsteadMetrics, ProjectType # Project type multipliers (salary and effort) # Effort multiplier = sqrt(salary multiplier) - expertise affects salary more than raw effort @@ -40,3 +42,101 @@ def get_complexity_multiplier(complexity_level: ComplexityLevel) -> float: """Get multiplier for a complexity level.""" return COMPLEXITY_MULTIPLIERS.get(complexity_level, 1.0) + +def get_maturity_multiplier(git_history: Optional[GitHistoryMetrics]) -> float: + """ + Calculate maturity multiplier based on git history. + + Projects with long history, many commits, and many contributors + are more complex to maintain and understand. + + Args: + git_history: Git history metrics + + Returns: + Multiplier between 1.0 and 2.5 + """ + if not git_history: + return 1.0 + + multiplier = 1.0 + + # Age multiplier (mature projects are more complex) + # 0-1 year: 1.0x + # 1-3 years: 1.1x + # 3-5 years: 1.2x + # 5-10 years: 1.4x + # 10+ years: 1.6x + if git_history.age_years >= 10: + multiplier += 0.6 + elif git_history.age_years >= 5: + multiplier += 0.4 + elif git_history.age_years >= 3: + multiplier += 0.2 + elif git_history.age_years >= 1: + multiplier += 0.1 + + # Contributor count multiplier (more contributors = more complexity) + # 1-5: 1.0x + # 6-20: 1.1x + # 21-50: 1.2x + # 51-100: 1.3x + # 100+: 1.5x + if git_history.contributor_count >= 100: + multiplier += 0.5 + elif git_history.contributor_count >= 51: + multiplier += 0.3 + elif git_history.contributor_count >= 21: + multiplier += 0.2 + elif git_history.contributor_count >= 6: + multiplier += 0.1 + + # Commit count multiplier (many commits = evolved complexity) + # <1000: 1.0x + # 1000-5000: 1.1x + # 5000-10000: 1.15x + # 10000-20000: 1.2x + # 20000+: 1.3x + if git_history.commit_count >= 20000: + multiplier += 0.3 + elif git_history.commit_count >= 10000: + multiplier += 0.2 + elif git_history.commit_count >= 5000: + multiplier += 0.15 + elif git_history.commit_count >= 1000: + multiplier += 0.1 + + # Cap at 2.5x for very mature/large projects + return min(multiplier, 2.5) + + +def get_halstead_multiplier(halstead: Optional[HalsteadMetrics]) -> float: + """ + Calculate complexity multiplier based on Halstead metrics. + + Higher difficulty and effort indicate more complex code. + + Args: + halstead: Halstead metrics + + Returns: + Multiplier between 0.8 and 1.8 + """ + if not halstead: + return 1.0 + + # Use difficulty as primary indicator + # Low difficulty (< 10): 0.8-0.9x + # Medium difficulty (10-20): 1.0x + # High difficulty (20-40): 1.2-1.4x + # Very high difficulty (> 40): 1.5-1.8x + + if halstead.difficulty < 10: + return 0.8 + (halstead.difficulty / 100) + elif halstead.difficulty < 20: + return 0.9 + (halstead.difficulty - 10) / 100 + elif halstead.difficulty < 40: + return 1.0 + (halstead.difficulty - 20) / 50 + else: + return min(1.5 + (halstead.difficulty - 40) / 100, 1.8) + diff --git a/src/ossval/estimators/cocomo.py b/src/ossval/estimators/cocomo.py index 8277b38..58008b6 100644 --- a/src/ossval/estimators/cocomo.py +++ b/src/ossval/estimators/cocomo.py @@ -2,6 +2,8 @@ from ossval.data.multipliers import ( get_complexity_multiplier, + get_halstead_multiplier, + get_maturity_multiplier, get_project_type_multiplier, ) from ossval.data.salaries import get_monthly_rate @@ -67,6 +69,8 @@ def estimate(self, package: Package, region: Region) -> CostEstimate: project_type=package.project_type, complexity_multiplier=1.0, project_type_multiplier=1.0, + maturity_multiplier=1.0, + halstead_multiplier=1.0, ) # Calculate KSLOC (thousands of source lines of code) @@ -83,14 +87,20 @@ def estimate(self, package: Package, region: Region) -> CostEstimate: package.project_type, multiplier_type="effort" ) - # Calculate effort using COCOMO II formula - # Effort = a × (KSLOC)^b × EAF × Complexity_Multiplier × Project_Type_Multiplier + # Get new multipliers based on git history and Halstead metrics + maturity_multiplier = get_maturity_multiplier(package.git_history) + halstead_multiplier = get_halstead_multiplier(package.halstead) + + # Calculate effort using COCOMO II formula with new multipliers + # Effort = a × (KSLOC)^b × EAF × Complexity × ProjectType × Maturity × Halstead effort_person_months = ( self.a * (ksloc ** self.b) * self.eaf * complexity_multiplier * project_type_multiplier + * maturity_multiplier + * halstead_multiplier ) # Convert to person-years @@ -110,11 +120,15 @@ def estimate(self, package: Package, region: Region) -> CostEstimate: # Calculate confidence (based on data availability) confidence = 0.5 # Base confidence if package.sloc: - confidence += 0.2 + confidence += 0.15 if package.complexity: - confidence += 0.2 - if package.health: confidence += 0.1 + if package.halstead: + confidence += 0.1 + if package.git_history: + confidence += 0.1 + if package.health: + confidence += 0.05 confidence = min(confidence, 1.0) # Calculate low and high estimates (70% and 150% of base) @@ -133,5 +147,7 @@ def estimate(self, package: Package, region: Region) -> CostEstimate: project_type=package.project_type, complexity_multiplier=complexity_multiplier, project_type_multiplier=project_type_multiplier, + maturity_multiplier=maturity_multiplier, + halstead_multiplier=halstead_multiplier, ) diff --git a/src/ossval/estimators/sloccount.py b/src/ossval/estimators/sloccount.py index b43f1b1..b27f93b 100644 --- a/src/ossval/estimators/sloccount.py +++ b/src/ossval/estimators/sloccount.py @@ -2,6 +2,8 @@ from ossval.data.multipliers import ( get_complexity_multiplier, + get_halstead_multiplier, + get_maturity_multiplier, get_project_type_multiplier, ) from ossval.data.salaries import get_monthly_rate @@ -53,6 +55,8 @@ def estimate(self, package: Package, region: Region) -> CostEstimate: project_type=package.project_type, complexity_multiplier=1.0, project_type_multiplier=1.0, + maturity_multiplier=1.0, + halstead_multiplier=1.0, ) # Calculate KSLOC @@ -69,9 +73,18 @@ def estimate(self, package: Package, region: Region) -> CostEstimate: package.project_type, multiplier_type="effort" ) - # Calculate effort: Effort = a × (KSLOC)^b + # Get new multipliers based on git history and Halstead metrics + maturity_multiplier = get_maturity_multiplier(package.git_history) + halstead_multiplier = get_halstead_multiplier(package.halstead) + + # Calculate effort with all multipliers effort_person_months = ( - self.a * (ksloc ** self.b) * complexity_multiplier * project_type_multiplier + self.a + * (ksloc ** self.b) + * complexity_multiplier + * project_type_multiplier + * maturity_multiplier + * halstead_multiplier ) effort_person_years = effort_person_months / 12.0 @@ -90,9 +103,13 @@ def estimate(self, package: Package, region: Region) -> CostEstimate: # Confidence confidence = 0.4 if package.sloc: - confidence += 0.3 - if package.complexity: confidence += 0.2 + if package.complexity: + confidence += 0.1 + if package.halstead: + confidence += 0.1 + if package.git_history: + confidence += 0.1 confidence = min(confidence, 1.0) cost_usd_low = base_cost * 0.7 @@ -110,5 +127,7 @@ def estimate(self, package: Package, region: Region) -> CostEstimate: project_type=package.project_type, complexity_multiplier=complexity_multiplier, project_type_multiplier=project_type_multiplier, + maturity_multiplier=maturity_multiplier, + halstead_multiplier=halstead_multiplier, ) diff --git a/src/ossval/models.py b/src/ossval/models.py index e3a661e..e7d16ab 100644 --- a/src/ossval/models.py +++ b/src/ossval/models.py @@ -113,6 +113,48 @@ class ComplexityMetrics(BaseModel): ) +class HalsteadMetrics(BaseModel): + """Halstead complexity metrics.""" + + vocabulary: int = Field(description="n = n1 + n2 (unique operators + operands)") + length: int = Field(description="N = N1 + N2 (total operators + operands)") + calculated_length: float = Field(description="Estimated length") + volume: float = Field(description="Program volume (N × log2(n))") + difficulty: float = Field(description="Program difficulty") + effort: float = Field(description="Effort to implement/understand") + time_seconds: float = Field(description="Time required to program (seconds)") + bugs: float = Field(description="Estimated number of bugs (volume / 3000)") + + +class GitHistoryMetrics(BaseModel): + """Git repository history metrics.""" + + commit_count: int = Field(description="Total number of commits") + contributor_count: int = Field(description="Total number of contributors") + age_days: int = Field(description="Repository age in days") + age_years: float = Field(description="Repository age in years") + first_commit_date: Optional[datetime] = Field(None, description="Date of first commit") + last_commit_date: Optional[datetime] = Field(None, description="Date of last commit") + release_count: int = Field(description="Number of releases/tags") + commits_per_month: float = Field(description="Average commits per month (last year)") + avg_files_per_commit: float = Field(description="Average files changed per commit") + high_churn_files: int = Field(description="Number of frequently changed files") + bus_factor: int = Field(description="Minimum contributors for 50% of commits") + + +class MaintainabilityMetrics(BaseModel): + """Maintainability Index and derived metrics.""" + + maintainability_index: float = Field( + ge=0.0, le=100.0, description="Maintainability Index (0-100, higher is better)" + ) + maintainability_level: str = Field(description="Low/Medium/High maintainability") + comment_ratio: float = Field(description="Ratio of comment lines to code lines") + avg_complexity_per_kloc: float = Field( + description="Average cyclomatic complexity per 1000 LOC" + ) + + class HealthMetrics(BaseModel): """Repository health metrics from GitHub API.""" @@ -147,6 +189,12 @@ class CostEstimate(BaseModel): project_type: ProjectType = Field(description="Project type classification") complexity_multiplier: float = Field(description="Complexity multiplier applied") project_type_multiplier: float = Field(description="Project type multiplier applied") + maturity_multiplier: float = Field( + default=1.0, description="Maturity/scale multiplier from git history" + ) + halstead_multiplier: float = Field( + default=1.0, description="Halstead complexity multiplier" + ) class Package(BaseModel): @@ -165,6 +213,11 @@ class Package(BaseModel): repository_url: Optional[str] = Field(None, description="Source repository URL") sloc: Optional[SLOCMetrics] = Field(None, description="SLOC metrics") complexity: Optional[ComplexityMetrics] = Field(None, description="Complexity metrics") + halstead: Optional[HalsteadMetrics] = Field(None, description="Halstead complexity metrics") + maintainability: Optional[MaintainabilityMetrics] = Field( + None, description="Maintainability metrics" + ) + git_history: Optional[GitHistoryMetrics] = Field(None, description="Git history metrics") health: Optional[HealthMetrics] = Field(None, description="Health metrics") cost_estimate: Optional[CostEstimate] = Field(None, description="Cost estimate") is_critical: bool = Field( @@ -194,6 +247,7 @@ class AnalysisConfig(BaseModel): methodology: str = Field("cocomo2", description="Cost estimation methodology") verbose: bool = Field(False, description="Verbose output") quiet: bool = Field(False, description="Quiet mode (minimal output)") + project_type_override: Optional[ProjectType] = Field(None, description="Override project type detection") class AnalysisResult(BaseModel): diff --git a/tests/test_analyzers/test_git_history.py b/tests/test_analyzers/test_git_history.py new file mode 100644 index 0000000..8a054c7 --- /dev/null +++ b/tests/test_analyzers/test_git_history.py @@ -0,0 +1,289 @@ +"""Tests for git history analyzer.""" + +import asyncio +import subprocess +import tempfile +from datetime import datetime, timedelta +from pathlib import Path + +import pytest + +from ossval.analyzers.git_history import analyze_git_history + + +def create_test_git_repo(path: Path, num_commits: int = 5, num_contributors: int = 2): + """Helper to create a test git repository.""" + # Initialize git repo + subprocess.run(["git", "init"], cwd=path, check=True, capture_output=True) + subprocess.run( + ["git", "config", "user.email", "test@example.com"], + cwd=path, + check=True, + capture_output=True, + ) + subprocess.run( + ["git", "config", "user.name", "Test User"], + cwd=path, + check=True, + capture_output=True, + ) + + # Create commits + for i in range(num_commits): + # Alternate between contributors + if i % 2 == 0: + user_name = "Test User" + user_email = "test@example.com" + else: + user_name = "Second User" + user_email = "second@example.com" + + subprocess.run( + ["git", "config", "user.name", user_name], + cwd=path, + check=True, + capture_output=True, + ) + subprocess.run( + ["git", "config", "user.email", user_email], + cwd=path, + check=True, + capture_output=True, + ) + + # Create/modify file + test_file = path / f"file{i}.txt" + test_file.write_text(f"Content {i}") + + subprocess.run(["git", "add", "."], cwd=path, check=True, capture_output=True) + subprocess.run( + ["git", "commit", "-m", f"Commit {i}"], + cwd=path, + check=True, + capture_output=True, + ) + + +@pytest.mark.asyncio +async def test_analyze_basic_git_repo(): + """Test analyzing a basic git repository.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + create_test_git_repo(tmppath, num_commits=5) + + metrics = await analyze_git_history(tmppath) + + assert metrics is not None + assert metrics.commit_count == 5 + assert metrics.contributor_count >= 1 + assert metrics.age_days >= 0 + assert metrics.age_years >= 0.0 + assert metrics.first_commit_date is not None + assert metrics.last_commit_date is not None + assert metrics.release_count >= 0 + assert metrics.bus_factor >= 1 + + +@pytest.mark.asyncio +async def test_analyze_repo_with_multiple_contributors(): + """Test analyzing a repo with multiple contributors.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + create_test_git_repo(tmppath, num_commits=10, num_contributors=2) + + metrics = await analyze_git_history(tmppath) + + assert metrics is not None + assert metrics.commit_count == 10 + # Should have at least 2 contributors (we alternate) + assert metrics.contributor_count >= 1 + + +@pytest.mark.asyncio +async def test_analyze_repo_with_tags(): + """Test analyzing a repo with release tags.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + create_test_git_repo(tmppath, num_commits=3) + + # Create tags + subprocess.run( + ["git", "tag", "v1.0.0"], + cwd=tmppath, + check=True, + capture_output=True, + ) + subprocess.run( + ["git", "tag", "v1.1.0"], + cwd=tmppath, + check=True, + capture_output=True, + ) + + metrics = await analyze_git_history(tmppath) + + assert metrics is not None + assert metrics.release_count == 2 + + +@pytest.mark.asyncio +async def test_analyze_non_git_directory(): + """Test analyzing a directory that's not a git repo.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + # Don't initialize git + + metrics = await analyze_git_history(tmppath) + + # Should return None for non-git directory + assert metrics is None + + +@pytest.mark.asyncio +async def test_analyze_commits_per_month(): + """Test that commits per month is calculated.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + create_test_git_repo(tmppath, num_commits=12) + + metrics = await analyze_git_history(tmppath) + + assert metrics is not None + # Should have calculated commits per month + assert metrics.commits_per_month >= 0.0 + + +@pytest.mark.asyncio +async def test_analyze_avg_files_per_commit(): + """Test that average files per commit is calculated.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + create_test_git_repo(tmppath, num_commits=5) + + metrics = await analyze_git_history(tmppath) + + assert metrics is not None + assert metrics.avg_files_per_commit >= 0.0 + + +@pytest.mark.asyncio +async def test_bus_factor_calculation(): + """Test bus factor calculation.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + + # Create repo with one dominant contributor + subprocess.run(["git", "init"], cwd=tmppath, check=True, capture_output=True) + subprocess.run( + ["git", "config", "user.email", "main@example.com"], + cwd=tmppath, + check=True, + capture_output=True, + ) + subprocess.run( + ["git", "config", "user.name", "Main User"], + cwd=tmppath, + check=True, + capture_output=True, + ) + + # Main user makes 90% of commits + for i in range(9): + (tmppath / f"file{i}.txt").write_text(f"Content {i}") + subprocess.run( + ["git", "add", "."], cwd=tmppath, check=True, capture_output=True + ) + subprocess.run( + ["git", "commit", "-m", f"Commit {i}"], + cwd=tmppath, + check=True, + capture_output=True, + ) + + # Second user makes 10% of commits + subprocess.run( + ["git", "config", "user.name", "Minor User"], + cwd=tmppath, + check=True, + capture_output=True, + ) + subprocess.run( + ["git", "config", "user.email", "minor@example.com"], + cwd=tmppath, + check=True, + capture_output=True, + ) + (tmppath / "file10.txt").write_text("Content 10") + subprocess.run(["git", "add", "."], cwd=tmppath, check=True, capture_output=True) + subprocess.run( + ["git", "commit", "-m", "Commit 10"], + cwd=tmppath, + check=True, + capture_output=True, + ) + + metrics = await analyze_git_history(tmppath) + + assert metrics is not None + # Bus factor should be 1 (one person does >50% of work) + assert metrics.bus_factor == 1 + + +@pytest.mark.asyncio +async def test_repository_age_calculation(): + """Test that repository age is calculated correctly.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + create_test_git_repo(tmppath, num_commits=2) + + metrics = await analyze_git_history(tmppath) + + assert metrics is not None + assert metrics.first_commit_date is not None + assert metrics.last_commit_date is not None + # Commits were just created, so age should be very small + assert metrics.age_days >= 0 + assert metrics.age_years >= 0.0 + # Should be less than 1 day old + assert metrics.age_days < 1 + + +@pytest.mark.asyncio +async def test_high_churn_files(): + """Test identification of high-churn files.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + + subprocess.run(["git", "init"], cwd=tmppath, check=True, capture_output=True) + subprocess.run( + ["git", "config", "user.email", "test@example.com"], + cwd=tmppath, + check=True, + capture_output=True, + ) + subprocess.run( + ["git", "config", "user.name", "Test User"], + cwd=tmppath, + check=True, + capture_output=True, + ) + + # Create a file and modify it many times + test_file = tmppath / "frequently_changed.txt" + for i in range(15): + test_file.write_text(f"Version {i}") + subprocess.run( + ["git", "add", "."], cwd=tmppath, check=True, capture_output=True + ) + subprocess.run( + ["git", "commit", "-m", f"Update {i}"], + cwd=tmppath, + check=True, + capture_output=True, + ) + + metrics = await analyze_git_history(tmppath) + + assert metrics is not None + # Should detect at least one high-churn file (>10 changes) + assert metrics.high_churn_files >= 1 diff --git a/tests/test_analyzers/test_halstead.py b/tests/test_analyzers/test_halstead.py new file mode 100644 index 0000000..c9c3401 --- /dev/null +++ b/tests/test_analyzers/test_halstead.py @@ -0,0 +1,450 @@ +"""Tests for Halstead complexity analyzer.""" + +import tempfile +from pathlib import Path + +import pytest + +from ossval.analyzers.halstead import ( + TREE_SITTER_AVAILABLE, + analyze_directory_halstead, + analyze_python_file, + analyze_source_file, + detect_language, +) + + +def test_analyze_simple_python_file(): + """Test Halstead analysis on a simple Python file.""" + code = """ +def add(a, b): + return a + b + +def multiply(x, y): + result = x * y + return result + +if __name__ == "__main__": + result = add(5, 3) + print(result) +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write(code) + f.flush() + + metrics = analyze_python_file(Path(f.name)) + + assert metrics is not None + assert metrics.vocabulary > 0 + assert metrics.length > 0 + assert metrics.volume > 0 + assert metrics.difficulty > 0 + assert metrics.effort > 0 + assert metrics.time_seconds > 0 + assert metrics.bugs >= 0 + + +def test_analyze_complex_python_file(): + """Test Halstead analysis on a more complex Python file.""" + code = """ +class Calculator: + def __init__(self): + self.history = [] + + def add(self, a, b): + result = a + b + self.history.append(("add", a, b, result)) + return result + + def subtract(self, a, b): + result = a - b + self.history.append(("subtract", a, b, result)) + return result + + def multiply(self, a, b): + result = a * b + self.history.append(("multiply", a, b, result)) + return result + + def divide(self, a, b): + if b == 0: + raise ValueError("Cannot divide by zero") + result = a / b + self.history.append(("divide", a, b, result)) + return result + + def get_history(self): + return self.history +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write(code) + f.flush() + + metrics = analyze_python_file(Path(f.name)) + + assert metrics is not None + # More complex code should have higher difficulty + assert metrics.difficulty > 5.0 + assert metrics.volume > 100.0 + + +def test_analyze_empty_file(): + """Test Halstead analysis on an empty file.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("") + f.flush() + + metrics = analyze_python_file(Path(f.name)) + + # Empty file should return None + assert metrics is None + + +def test_analyze_invalid_syntax(): + """Test Halstead analysis on file with invalid syntax.""" + code = "def broken(:\n pass" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write(code) + f.flush() + + metrics = analyze_python_file(Path(f.name)) + + # Tree-sitter is resilient to syntax errors and can still parse + # Without tree-sitter, Python AST fails on invalid syntax + if TREE_SITTER_AVAILABLE: + assert metrics is not None + assert metrics.vocabulary > 0 + else: + assert metrics is None + + +def test_analyze_directory(): + """Test Halstead analysis on a directory of Python files.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + + # Create multiple Python files + file1 = tmppath / "file1.py" + file1.write_text(""" +def func1(): + x = 1 + 2 + return x +""") + + file2 = tmppath / "file2.py" + file2.write_text(""" +def func2(a, b): + return a * b + a / b +""") + + file3 = tmppath / "file3.py" + file3.write_text(""" +class MyClass: + def method(self): + return "hello" +""") + + metrics = analyze_directory_halstead(tmppath) + + assert metrics is not None + assert metrics.volume > 0 + assert metrics.effort > 0 + assert metrics.difficulty > 0 + + +def test_analyze_directory_no_python_files(): + """Test Halstead analysis on directory with no Python files.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + + # Create a non-Python file + (tmppath / "readme.txt").write_text("Not Python") + + metrics = analyze_directory_halstead(tmppath) + + # No Python files should return None + assert metrics is None + + +def test_analyze_directory_skips_venv(): + """Test that analysis skips venv directories.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + + # Create a regular Python file with some content + (tmppath / "main.py").write_text(""" +def main(): + x = 1 + 2 + return x +""") + + # Create files in venv (should be skipped) + venv_dir = tmppath / "venv" + venv_dir.mkdir() + (venv_dir / "lib.py").write_text(""" +def venv_func(): + y = 10 * 20 + return y +""") + + metrics = analyze_directory_halstead(tmppath) + + # Should analyze only main.py, not venv + assert metrics is not None + # If venv was included, volume would be higher + assert metrics.volume > 0 + + +def test_halstead_metrics_calculations(): + """Test that Halstead metrics are calculated correctly.""" + code = """ +def simple(x): + return x + 1 +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write(code) + f.flush() + + metrics = analyze_python_file(Path(f.name)) + + assert metrics is not None + + # Basic sanity checks for metric relationships + assert metrics.length >= metrics.vocabulary # N >= n + assert metrics.effort == metrics.difficulty * metrics.volume + assert metrics.time_seconds == metrics.effort / 18.0 # Stroud number + assert metrics.bugs == metrics.volume / 3000.0 + + +def test_detect_language(): + """Test language detection from file extensions.""" + assert detect_language(Path("test.py")) == "python" + assert detect_language(Path("test.js")) == "javascript" + assert detect_language(Path("test.ts")) == "typescript" + assert detect_language(Path("test.tsx")) == "typescript" + assert detect_language(Path("test.java")) == "java" + assert detect_language(Path("test.c")) == "c" + assert detect_language(Path("test.cpp")) == "cpp" + assert detect_language(Path("test.cs")) == "c_sharp" + assert detect_language(Path("test.go")) == "go" + assert detect_language(Path("test.rs")) == "rust" + assert detect_language(Path("test.php")) == "php" + assert detect_language(Path("test.rb")) == "ruby" + assert detect_language(Path("test.swift")) == "swift" + assert detect_language(Path("test.txt")) is None + assert detect_language(Path("test.md")) is None + + +@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="Requires tree-sitter for multi-language support") +def test_analyze_javascript_file(): + """Test Halstead analysis on a JavaScript file.""" + code = """ +function add(a, b) { + return a + b; +} + +function multiply(x, y) { + const result = x * y; + return result; +} + +if (typeof module !== 'undefined') { + const result = add(5, 3); + console.log(result); +} +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".js", delete=False) as f: + f.write(code) + f.flush() + + metrics = analyze_source_file(Path(f.name)) + + assert metrics is not None + assert metrics.vocabulary > 0 + assert metrics.length > 0 + assert metrics.volume > 0 + assert metrics.difficulty > 0 + assert metrics.effort > 0 + + +@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="Requires tree-sitter for multi-language support") +def test_analyze_typescript_file(): + """Test Halstead analysis on a TypeScript file.""" + code = """ +interface Calculator { + add(a: number, b: number): number; + multiply(a: number, b: number): number; +} + +class SimpleCalculator implements Calculator { + add(a: number, b: number): number { + return a + b; + } + + multiply(a: number, b: number): number { + const result = a * b; + return result; + } +} + +const calc = new SimpleCalculator(); +const sum = calc.add(5, 3); +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".ts", delete=False) as f: + f.write(code) + f.flush() + + metrics = analyze_source_file(Path(f.name)) + + assert metrics is not None + assert metrics.vocabulary > 0 + assert metrics.volume > 0 + assert metrics.difficulty > 0 + + +@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="Requires tree-sitter for multi-language support") +def test_analyze_java_file(): + """Test Halstead analysis on a Java file.""" + code = """ +public class Calculator { + public int add(int a, int b) { + return a + b; + } + + public int multiply(int a, int b) { + int result = a * b; + return result; + } + + public static void main(String[] args) { + Calculator calc = new Calculator(); + int sum = calc.add(5, 3); + System.out.println(sum); + } +} +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".java", delete=False) as f: + f.write(code) + f.flush() + + metrics = analyze_source_file(Path(f.name)) + + assert metrics is not None + assert metrics.vocabulary > 0 + assert metrics.volume > 0 + + +@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="Requires tree-sitter for multi-language support") +def test_analyze_go_file(): + """Test Halstead analysis on a Go file.""" + code = """ +package main + +import "fmt" + +func add(a int, b int) int { + return a + b +} + +func multiply(x int, y int) int { + result := x * y + return result +} + +func main() { + result := add(5, 3) + fmt.Println(result) +} +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".go", delete=False) as f: + f.write(code) + f.flush() + + metrics = analyze_source_file(Path(f.name)) + + assert metrics is not None + assert metrics.vocabulary > 0 + assert metrics.volume > 0 + + +@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="Requires tree-sitter for multi-language support") +def test_analyze_rust_file(): + """Test Halstead analysis on a Rust file.""" + code = """ +fn add(a: i32, b: i32) -> i32 { + a + b +} + +fn multiply(x: i32, y: i32) -> i32 { + let result = x * y; + result +} + +fn main() { + let result = add(5, 3); + println!("{}", result); +} +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".rs", delete=False) as f: + f.write(code) + f.flush() + + metrics = analyze_source_file(Path(f.name)) + + assert metrics is not None + assert metrics.vocabulary > 0 + assert metrics.volume > 0 + + +@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="Requires tree-sitter for multi-language support") +def test_analyze_multi_language_directory(): + """Test Halstead analysis on a directory with multiple languages.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + + # Create Python file + (tmppath / "calc.py").write_text(""" +def add(a, b): + return a + b +""") + + # Create JavaScript file + (tmppath / "calc.js").write_text(""" +function add(a, b) { + return a + b; +} +""") + + # Create Java file + (tmppath / "Calc.java").write_text(""" +public class Calc { + public int add(int a, int b) { + return a + b; + } +} +""") + + # Create Go file + (tmppath / "calc.go").write_text(""" +package main +func add(a int, b int) int { + return a + b +} +""") + + metrics = analyze_directory_halstead(tmppath) + + # Should aggregate metrics from all supported files + assert metrics is not None + assert metrics.volume > 0 + assert metrics.effort > 0 + assert metrics.bugs > 0 diff --git a/tests/test_analyzers/test_maintainability.py b/tests/test_analyzers/test_maintainability.py new file mode 100644 index 0000000..7d01840 --- /dev/null +++ b/tests/test_analyzers/test_maintainability.py @@ -0,0 +1,239 @@ +"""Tests for maintainability index calculator.""" + +from ossval.analyzers.maintainability import calculate_maintainability_index +from ossval.models import ( + ComplexityLevel, + ComplexityMetrics, + HalsteadMetrics, + SLOCMetrics, +) + + +def test_calculate_maintainability_with_all_metrics(): + """Test maintainability calculation with all metrics available.""" + sloc = SLOCMetrics( + total=10000, + code_lines=8000, + comment_lines=1500, + blank_lines=500, + by_language={"python": 8000}, + ) + + halstead = HalsteadMetrics( + vocabulary=50, + length=200, + calculated_length=180.0, + volume=1200.0, + difficulty=15.0, + effort=18000.0, + time_seconds=1000.0, + bugs=0.4, + ) + + complexity = ComplexityMetrics( + cyclomatic_complexity_avg=8.0, + cyclomatic_complexity_max=25, + cyclomatic_complexity_sum=800, + complexity_level=ComplexityLevel.MODERATE, + ) + + metrics = calculate_maintainability_index(sloc, halstead, complexity) + + assert metrics is not None + assert 0 <= metrics.maintainability_index <= 100 + assert metrics.maintainability_level in ["Low", "Medium", "High"] + assert 0 <= metrics.comment_ratio <= 1.0 + assert metrics.avg_complexity_per_kloc > 0 + + +def test_calculate_maintainability_without_halstead(): + """Test maintainability calculation without Halstead metrics.""" + sloc = SLOCMetrics( + total=5000, + code_lines=4000, + comment_lines=800, + blank_lines=200, + by_language={"python": 4000}, + ) + + complexity = ComplexityMetrics( + cyclomatic_complexity_avg=10.0, + complexity_level=ComplexityLevel.MODERATE, + ) + + metrics = calculate_maintainability_index(sloc, None, complexity) + + assert metrics is not None + assert 0 <= metrics.maintainability_index <= 100 + # Should still calculate MI using estimated Halstead volume + assert metrics.maintainability_level in ["Low", "Medium", "High"] + + +def test_calculate_maintainability_without_complexity(): + """Test maintainability calculation without complexity metrics.""" + sloc = SLOCMetrics( + total=5000, + code_lines=4000, + comment_lines=800, + blank_lines=200, + by_language={"python": 4000}, + ) + + halstead = HalsteadMetrics( + vocabulary=40, + length=150, + calculated_length=140.0, + volume=900.0, + difficulty=12.0, + effort=10800.0, + time_seconds=600.0, + bugs=0.3, + ) + + metrics = calculate_maintainability_index(sloc, halstead, None) + + assert metrics is not None + assert 0 <= metrics.maintainability_index <= 100 + # Should use default complexity + + +def test_calculate_maintainability_sloc_only(): + """Test maintainability calculation with only SLOC.""" + sloc = SLOCMetrics( + total=3000, + code_lines=2500, + comment_lines=400, + blank_lines=100, + by_language={"python": 2500}, + ) + + metrics = calculate_maintainability_index(sloc, None, None) + + assert metrics is not None + assert 0 <= metrics.maintainability_index <= 100 + # With defaults, should still produce valid MI + assert metrics.comment_ratio == 400 / 3000 + + +def test_maintainability_level_classification(): + """Test that maintainability levels are classified correctly.""" + # Test high maintainability (good code) + sloc_high = SLOCMetrics( + total=1000, + code_lines=800, + comment_lines=150, + blank_lines=50, + by_language={"python": 800}, + ) + halstead_high = HalsteadMetrics( + vocabulary=20, + length=100, + calculated_length=90.0, + volume=500.0, + difficulty=5.0, + effort=2500.0, + time_seconds=139.0, + bugs=0.17, + ) + complexity_high = ComplexityMetrics( + cyclomatic_complexity_avg=3.0, + complexity_level=ComplexityLevel.SIMPLE, + ) + + metrics_high = calculate_maintainability_index( + sloc_high, halstead_high, complexity_high + ) + assert metrics_high is not None + # Low complexity, good comments, small volume should give higher MI + + +def test_comment_ratio_calculation(): + """Test that comment ratio is calculated correctly.""" + sloc = SLOCMetrics( + total=10000, + code_lines=7000, + comment_lines=2000, # 20% comments + blank_lines=1000, + by_language={"python": 7000}, + ) + + metrics = calculate_maintainability_index(sloc, None, None) + + assert metrics is not None + assert metrics.comment_ratio == 0.2 # 2000/10000 + + +def test_complexity_per_kloc_calculation(): + """Test that complexity per KLOC is calculated correctly.""" + sloc = SLOCMetrics( + total=5000, + code_lines=4000, + comment_lines=800, + blank_lines=200, + by_language={"python": 4000}, + ) + + complexity = ComplexityMetrics( + cyclomatic_complexity_avg=16.0, # 16 avg complexity + complexity_level=ComplexityLevel.COMPLEX, + ) + + metrics = calculate_maintainability_index(sloc, None, complexity) + + assert metrics is not None + # (16 / 4000) * 1000 = 4.0 + assert metrics.avg_complexity_per_kloc == 4.0 + + +def test_maintainability_with_no_sloc(): + """Test that calculation returns None with no SLOC.""" + metrics = calculate_maintainability_index(None, None, None) + assert metrics is None + + +def test_maintainability_with_zero_sloc(): + """Test that calculation returns None with zero SLOC.""" + sloc = SLOCMetrics( + total=0, + code_lines=0, + comment_lines=0, + blank_lines=0, + by_language={}, + ) + + metrics = calculate_maintainability_index(sloc, None, None) + assert metrics is None + + +def test_maintainability_index_bounds(): + """Test that MI is always clamped to 0-100 range.""" + # Test with very large codebase (might produce negative MI) + sloc_large = SLOCMetrics( + total=1000000, + code_lines=900000, + comment_lines=50000, + blank_lines=50000, + by_language={"python": 900000}, + ) + + complexity_high = ComplexityMetrics( + cyclomatic_complexity_avg=50.0, + complexity_level=ComplexityLevel.VERY_COMPLEX, + ) + + halstead_high = HalsteadMetrics( + vocabulary=200, + length=10000, + calculated_length=9500.0, + volume=100000.0, + difficulty=80.0, + effort=8000000.0, + time_seconds=444444.0, + bugs=33.3, + ) + + metrics = calculate_maintainability_index(sloc_large, halstead_high, complexity_high) + + assert metrics is not None + # MI should be clamped to 0-100 + assert 0 <= metrics.maintainability_index <= 100 diff --git a/tests/test_cli.py b/tests/test_cli.py index 602fe55..47bed83 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -21,6 +21,25 @@ def test_cli_estimate(): assert "estimate" in result.output or result.exit_code in [0, 1] +def test_cli_estimate_with_type(): + """Test estimate command with project type.""" + runner = CliRunner() + result = runner.invoke(main, ["estimate", "--sloc", "50000", "--region", "us_sf", "--type", "compiler"]) + assert result.exit_code == 0 + assert "Estimated cost" in result.output + + +def test_cli_analyze_with_type(sample_requirements_txt): + """Test analyze command with project type override.""" + runner = CliRunner() + result = runner.invoke( + main, + ["analyze", sample_requirements_txt, "--type", "framework", "--no-clone", "--quiet"] + ) + # Should at least parse the arguments correctly + assert result.exit_code in [0, 1] + + def test_cli_formats(): """Test formats command.""" runner = CliRunner() diff --git a/tests/test_data/test_multipliers.py b/tests/test_data/test_multipliers.py index c5aca7d..d4ca122 100644 --- a/tests/test_data/test_multipliers.py +++ b/tests/test_data/test_multipliers.py @@ -1,21 +1,25 @@ """Tests for multiplier data.""" +from datetime import datetime, timedelta + from ossval.data.multipliers import ( get_complexity_multiplier, + get_halstead_multiplier, + get_maturity_multiplier, get_project_type_multiplier, ) -from ossval.models import ComplexityLevel, ProjectType +from ossval.models import ComplexityLevel, GitHistoryMetrics, HalsteadMetrics, ProjectType def test_get_project_type_multiplier(): """Test getting project type multipliers.""" # Library should be baseline (1.0) assert get_project_type_multiplier(ProjectType.LIBRARY) == 1.0 - + # Compiler should have higher multiplier compiler_mult = get_project_type_multiplier(ProjectType.COMPILER) assert compiler_mult > 1.0 - + # Script should have lower multiplier script_mult = get_project_type_multiplier(ProjectType.SCRIPT) assert script_mult < 1.0 @@ -25,10 +29,243 @@ def test_get_complexity_multiplier(): """Test getting complexity multipliers.""" # Moderate should be baseline (1.0) assert get_complexity_multiplier(ComplexityLevel.MODERATE) == 1.0 - + # Trivial should be lower assert get_complexity_multiplier(ComplexityLevel.TRIVIAL) < 1.0 - + # Very complex should be higher assert get_complexity_multiplier(ComplexityLevel.VERY_COMPLEX) > 1.0 + +def test_get_maturity_multiplier_none(): + """Test maturity multiplier with no git history.""" + multiplier = get_maturity_multiplier(None) + assert multiplier == 1.0 + + +def test_get_maturity_multiplier_young_project(): + """Test maturity multiplier for young project.""" + git_history = GitHistoryMetrics( + commit_count=100, + contributor_count=3, + age_days=180, # 6 months + age_years=0.5, + first_commit_date=datetime.now() - timedelta(days=180), + last_commit_date=datetime.now(), + release_count=2, + commits_per_month=20.0, + avg_files_per_commit=2.5, + high_churn_files=5, + bus_factor=2, + ) + + multiplier = get_maturity_multiplier(git_history) + + # Young project should have multiplier close to 1.0 + assert 1.0 <= multiplier <= 1.3 + + +def test_get_maturity_multiplier_mature_project(): + """Test maturity multiplier for mature project.""" + git_history = GitHistoryMetrics( + commit_count=25000, # Many commits + contributor_count=150, # Many contributors + age_days=3650, # 10 years + age_years=10.0, + first_commit_date=datetime.now() - timedelta(days=3650), + last_commit_date=datetime.now(), + release_count=50, + commits_per_month=208.0, + avg_files_per_commit=5.0, + high_churn_files=100, + bus_factor=5, + ) + + multiplier = get_maturity_multiplier(git_history) + + # Mature, large project should have high multiplier + assert multiplier >= 2.0 + # But capped at 2.5 + assert multiplier <= 2.5 + + +def test_get_maturity_multiplier_medium_project(): + """Test maturity multiplier for medium-sized project.""" + git_history = GitHistoryMetrics( + commit_count=2000, + contributor_count=15, + age_days=1095, # 3 years + age_years=3.0, + first_commit_date=datetime.now() - timedelta(days=1095), + last_commit_date=datetime.now(), + release_count=10, + commits_per_month=55.0, + avg_files_per_commit=3.0, + high_churn_files=20, + bus_factor=3, + ) + + multiplier = get_maturity_multiplier(git_history) + + # Medium project should have multiplier between 1.2 and 1.8 + assert 1.2 <= multiplier <= 1.8 + + +def test_get_halstead_multiplier_none(): + """Test Halstead multiplier with no metrics.""" + multiplier = get_halstead_multiplier(None) + assert multiplier == 1.0 + + +def test_get_halstead_multiplier_low_difficulty(): + """Test Halstead multiplier for low difficulty code.""" + halstead = HalsteadMetrics( + vocabulary=20, + length=50, + calculated_length=45.0, + volume=250.0, + difficulty=5.0, # Low difficulty + effort=1250.0, + time_seconds=69.4, + bugs=0.08, + ) + + multiplier = get_halstead_multiplier(halstead) + + # Low difficulty should give multiplier < 1.0 + assert 0.8 <= multiplier < 1.0 + + +def test_get_halstead_multiplier_medium_difficulty(): + """Test Halstead multiplier for medium difficulty code.""" + halstead = HalsteadMetrics( + vocabulary=50, + length=150, + calculated_length=140.0, + volume=900.0, + difficulty=15.0, # Medium difficulty + effort=13500.0, + time_seconds=750.0, + bugs=0.3, + ) + + multiplier = get_halstead_multiplier(halstead) + + # Medium difficulty should give multiplier around 1.0 + assert 0.95 <= multiplier <= 1.05 + + +def test_get_halstead_multiplier_high_difficulty(): + """Test Halstead multiplier for high difficulty code.""" + halstead = HalsteadMetrics( + vocabulary=100, + length=500, + calculated_length=480.0, + volume=3500.0, + difficulty=50.0, # High difficulty + effort=175000.0, + time_seconds=9722.2, + bugs=1.17, + ) + + multiplier = get_halstead_multiplier(halstead) + + # High difficulty should give multiplier > 1.5 + assert multiplier >= 1.5 + # Capped at 1.8 + assert multiplier <= 1.8 + + +def test_get_halstead_multiplier_very_high_difficulty(): + """Test Halstead multiplier for very high difficulty code.""" + halstead = HalsteadMetrics( + vocabulary=200, + length=1000, + calculated_length=950.0, + volume=10000.0, + difficulty=100.0, # Very high difficulty + effort=1000000.0, + time_seconds=55555.5, + bugs=3.33, + ) + + multiplier = get_halstead_multiplier(halstead) + + # Very high difficulty should be capped at 1.8 + assert multiplier == 1.8 + + +def test_maturity_multiplier_age_component(): + """Test that age affects maturity multiplier.""" + # Create identical git histories except for age + young_git = GitHistoryMetrics( + commit_count=1000, + contributor_count=10, + age_days=365, # 1 year + age_years=1.0, + first_commit_date=datetime.now() - timedelta(days=365), + last_commit_date=datetime.now(), + release_count=5, + commits_per_month=83.0, + avg_files_per_commit=2.0, + high_churn_files=10, + bus_factor=2, + ) + + old_git = GitHistoryMetrics( + commit_count=1000, + contributor_count=10, + age_days=3650, # 10 years + age_years=10.0, + first_commit_date=datetime.now() - timedelta(days=3650), + last_commit_date=datetime.now(), + release_count=5, + commits_per_month=8.3, + avg_files_per_commit=2.0, + high_churn_files=10, + bus_factor=2, + ) + + young_mult = get_maturity_multiplier(young_git) + old_mult = get_maturity_multiplier(old_git) + + # Older project should have higher multiplier + assert old_mult > young_mult + + +def test_maturity_multiplier_contributor_component(): + """Test that contributor count affects maturity multiplier.""" + few_contributors = GitHistoryMetrics( + commit_count=1000, + contributor_count=3, # Few contributors + age_days=1095, + age_years=3.0, + first_commit_date=datetime.now() - timedelta(days=1095), + last_commit_date=datetime.now(), + release_count=5, + commits_per_month=30.0, + avg_files_per_commit=2.0, + high_churn_files=10, + bus_factor=1, + ) + + many_contributors = GitHistoryMetrics( + commit_count=1000, + contributor_count=200, # Many contributors + age_days=1095, + age_years=3.0, + first_commit_date=datetime.now() - timedelta(days=1095), + last_commit_date=datetime.now(), + release_count=5, + commits_per_month=30.0, + avg_files_per_commit=2.0, + high_churn_files=10, + bus_factor=10, + ) + + few_mult = get_maturity_multiplier(few_contributors) + many_mult = get_maturity_multiplier(many_contributors) + + # More contributors should mean higher complexity/multiplier + assert many_mult > few_mult + diff --git a/tests/test_estimators/test_cocomo.py b/tests/test_estimators/test_cocomo.py index 7a84ffa..cb33955 100644 --- a/tests/test_estimators/test_cocomo.py +++ b/tests/test_estimators/test_cocomo.py @@ -1,16 +1,23 @@ """Tests for COCOMO II estimator.""" +from datetime import datetime, timedelta + from ossval.estimators.cocomo import COCOMO2Estimator from ossval.models import ( ComplexityLevel, - ComplexityMetrics, Package, Region, SLOCMetrics + ComplexityMetrics, + GitHistoryMetrics, + HalsteadMetrics, + Package, + Region, + SLOCMetrics, ) def test_cocomo_estimate_basic(): """Test basic COCOMO II estimation.""" estimator = COCOMO2Estimator() - + package = Package( name="test", sloc=SLOCMetrics( @@ -25,32 +32,37 @@ def test_cocomo_estimate_basic(): complexity_level=ComplexityLevel.MODERATE, ), ) - + result = estimator.estimate(package, Region.US_SF) - + assert result.cost_usd > 0 assert result.effort_person_months > 0 assert result.methodology == "COCOMO II" assert result.region == Region.US_SF assert result.cost_usd_low < result.cost_usd < result.cost_usd_high + # New multipliers should default to 1.0 when no git/halstead data + assert result.maturity_multiplier == 1.0 + assert result.halstead_multiplier == 1.0 def test_cocomo_zero_sloc(): """Test COCOMO with zero SLOC.""" estimator = COCOMO2Estimator() - + package = Package(name="test", sloc=None) - + result = estimator.estimate(package, Region.US_SF) - + assert result.cost_usd == 0 assert result.effort_person_months == 0 + assert result.maturity_multiplier == 1.0 + assert result.halstead_multiplier == 1.0 def test_cocomo_custom_parameters(): """Test COCOMO with custom parameters.""" estimator = COCOMO2Estimator(a=3.0, b=1.1, eaf=1.2) - + package = Package( name="test", sloc=SLOCMetrics( @@ -61,7 +73,214 @@ def test_cocomo_custom_parameters(): by_language={"python": 8000}, ), ) - + result = estimator.estimate(package, Region.US_SF) assert result.cost_usd > 0 + +def test_cocomo_with_git_history(): + """Test COCOMO estimation with git history metrics.""" + estimator = COCOMO2Estimator() + + git_history = GitHistoryMetrics( + commit_count=10000, + contributor_count=50, + age_days=1825, # 5 years + age_years=5.0, + first_commit_date=datetime.now() - timedelta(days=1825), + last_commit_date=datetime.now(), + release_count=20, + commits_per_month=166.0, + avg_files_per_commit=3.0, + high_churn_files=30, + bus_factor=3, + ) + + package = Package( + name="test", + sloc=SLOCMetrics( + total=10000, + code_lines=8000, + comment_lines=1500, + blank_lines=500, + by_language={"python": 8000}, + ), + git_history=git_history, + ) + + result = estimator.estimate(package, Region.US_SF) + + assert result.cost_usd > 0 + # Mature project should have maturity multiplier > 1.0 + assert result.maturity_multiplier > 1.0 + + +def test_cocomo_with_halstead_metrics(): + """Test COCOMO estimation with Halstead metrics.""" + estimator = COCOMO2Estimator() + + halstead = HalsteadMetrics( + vocabulary=100, + length=500, + calculated_length=480.0, + volume=3500.0, + difficulty=30.0, # High difficulty + effort=105000.0, + time_seconds=5833.3, + bugs=1.17, + ) + + package = Package( + name="test", + sloc=SLOCMetrics( + total=10000, + code_lines=8000, + comment_lines=1500, + blank_lines=500, + by_language={"python": 8000}, + ), + halstead=halstead, + ) + + result = estimator.estimate(package, Region.US_SF) + + assert result.cost_usd > 0 + # High difficulty should increase multiplier + assert result.halstead_multiplier > 1.0 + + +def test_cocomo_with_all_metrics(): + """Test COCOMO estimation with all new metrics.""" + estimator = COCOMO2Estimator() + + git_history = GitHistoryMetrics( + commit_count=20000, + contributor_count=150, + age_days=3650, # 10 years + age_years=10.0, + first_commit_date=datetime.now() - timedelta(days=3650), + last_commit_date=datetime.now(), + release_count=50, + commits_per_month=166.0, + avg_files_per_commit=5.0, + high_churn_files=100, + bus_factor=5, + ) + + halstead = HalsteadMetrics( + vocabulary=150, + length=800, + calculated_length=750.0, + volume=6000.0, + difficulty=40.0, + effort=240000.0, + time_seconds=13333.3, + bugs=2.0, + ) + + package = Package( + name="test", + sloc=SLOCMetrics( + total=50000, + code_lines=40000, + comment_lines=8000, + blank_lines=2000, + by_language={"python": 40000}, + ), + complexity=ComplexityMetrics( + cyclomatic_complexity_avg=15.0, + complexity_level=ComplexityLevel.COMPLEX, + ), + git_history=git_history, + halstead=halstead, + ) + + result_with_metrics = estimator.estimate(package, Region.US_SF) + + # Create same package without git history and halstead + package_basic = Package( + name="test", + sloc=SLOCMetrics( + total=50000, + code_lines=40000, + comment_lines=8000, + blank_lines=2000, + by_language={"python": 40000}, + ), + complexity=ComplexityMetrics( + cyclomatic_complexity_avg=15.0, + complexity_level=ComplexityLevel.COMPLEX, + ), + ) + + result_basic = estimator.estimate(package_basic, Region.US_SF) + + # Estimate with all metrics should be significantly higher + assert result_with_metrics.cost_usd > result_basic.cost_usd + assert result_with_metrics.maturity_multiplier > 1.5 + assert result_with_metrics.halstead_multiplier > 1.0 + # Total effort should reflect all multipliers + assert result_with_metrics.effort_person_months > result_basic.effort_person_months + + +def test_cocomo_confidence_increases_with_metrics(): + """Test that confidence increases when more metrics are available.""" + estimator = COCOMO2Estimator() + + # Package with only SLOC + package_minimal = Package( + name="test", + sloc=SLOCMetrics( + total=10000, + code_lines=8000, + comment_lines=1500, + blank_lines=500, + by_language={"python": 8000}, + ), + ) + + # Package with all metrics + package_complete = Package( + name="test", + sloc=SLOCMetrics( + total=10000, + code_lines=8000, + comment_lines=1500, + blank_lines=500, + by_language={"python": 8000}, + ), + complexity=ComplexityMetrics( + cyclomatic_complexity_avg=10.0, + complexity_level=ComplexityLevel.MODERATE, + ), + halstead=HalsteadMetrics( + vocabulary=50, + length=150, + calculated_length=140.0, + volume=900.0, + difficulty=15.0, + effort=13500.0, + time_seconds=750.0, + bugs=0.3, + ), + git_history=GitHistoryMetrics( + commit_count=1000, + contributor_count=10, + age_days=730, + age_years=2.0, + first_commit_date=datetime.now() - timedelta(days=730), + last_commit_date=datetime.now(), + release_count=10, + commits_per_month=41.0, + avg_files_per_commit=2.0, + high_churn_files=10, + bus_factor=2, + ), + ) + + result_minimal = estimator.estimate(package_minimal, Region.US_SF) + result_complete = estimator.estimate(package_complete, Region.US_SF) + + # Confidence should be higher with more data + assert result_complete.confidence > result_minimal.confidence + diff --git a/tests/test_integration_e2e.py b/tests/test_integration_e2e.py new file mode 100644 index 0000000..3c2ff17 --- /dev/null +++ b/tests/test_integration_e2e.py @@ -0,0 +1,329 @@ +"""End-to-end integration tests for the complete analysis pipeline.""" + +import asyncio +import subprocess +import tempfile +from pathlib import Path + +import pytest + +from ossval.core import analyze +from ossval.models import AnalysisConfig, Package, ProjectType, Region + + +def create_test_project_with_git(path: Path): + """Create a test Python project with git history.""" + # Initialize git + subprocess.run(["git", "init"], cwd=path, check=True, capture_output=True) + subprocess.run( + ["git", "config", "user.email", "test@example.com"], + cwd=path, + check=True, + capture_output=True, + ) + subprocess.run( + ["git", "config", "user.name", "Test User"], + cwd=path, + check=True, + capture_output=True, + ) + + # Create Python files with varying complexity + (path / "simple.py").write_text(""" +def add(a, b): + return a + b + +def subtract(a, b): + return a - b +""") + + (path / "moderate.py").write_text(""" +class Calculator: + def __init__(self): + self.history = [] + + def calculate(self, operation, a, b): + if operation == "add": + result = a + b + elif operation == "subtract": + result = a - b + elif operation == "multiply": + result = a * b + elif operation == "divide": + if b == 0: + raise ValueError("Cannot divide by zero") + result = a / b + else: + raise ValueError("Unknown operation") + + self.history.append((operation, a, b, result)) + return result + + def get_history(self): + return self.history +""") + + (path / "complex.py").write_text(""" +def complex_function(data, options=None): + ''' + A complex function with multiple branches and loops. + ''' + if options is None: + options = {} + + result = [] + for item in data: + if isinstance(item, dict): + for key, value in item.items(): + if key in options: + if options[key] == "transform": + value = str(value).upper() + elif options[key] == "filter": + if value > 10: + result.append(value) + elif options[key] == "compute": + try: + value = value * 2 + 1 + result.append(value) + except TypeError: + continue + else: + result.append(value) + elif isinstance(item, (list, tuple)): + for sub_item in item: + if sub_item not in result: + result.append(sub_item) + else: + result.append(item) + + return result +""") + + # Commit files + subprocess.run(["git", "add", "."], cwd=path, check=True, capture_output=True) + subprocess.run( + ["git", "commit", "-m", "Initial commit"], + cwd=path, + check=True, + capture_output=True, + ) + + # Create a tag + subprocess.run( + ["git", "tag", "v1.0.0"], cwd=path, check=True, capture_output=True + ) + + # Make more commits + (path / "simple.py").write_text(""" +def add(a, b): + '''Add two numbers.''' + return a + b + +def subtract(a, b): + '''Subtract b from a.''' + return a - b + +def multiply(a, b): + '''Multiply two numbers.''' + return a * b +""") + + subprocess.run(["git", "add", "."], cwd=path, check=True, capture_output=True) + subprocess.run( + ["git", "commit", "-m", "Add multiply function"], + cwd=path, + check=True, + capture_output=True, + ) + + +@pytest.mark.asyncio +async def test_e2e_analysis_with_all_metrics(): + """Test end-to-end analysis with all metrics (Halstead, git history, maintainability).""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + project_path = tmppath / "test_project" + project_path.mkdir() + + # Create test project with git history + create_test_project_with_git(project_path) + + # Create a package pointing to this repository + package = Package( + name="test-project", + version="1.0.0", + ecosystem="pypi", + repository_url=str(project_path), + project_type=ProjectType.LIBRARY, + ) + + # Analyze with cloning enabled + config = AnalysisConfig( + clone_repos=True, + use_cache=False, # Don't cache in tests + region=Region.GLOBAL_AVERAGE, + methodology="cocomo2", + ) + + result = await analyze([package], config) + + assert result is not None + assert len(result.packages) == 1 + + analyzed_package = result.packages[0] + + # Verify SLOC was analyzed + assert analyzed_package.sloc is not None + assert analyzed_package.sloc.total > 0 + assert analyzed_package.sloc.code_lines > 0 + + # Note: Halstead and git history may not be analyzed for local paths + # without proper cloning setup. This is expected behavior. + # For a true end-to-end test with real cloning, we'd need a real git URL. + + # Verify maintainability index was calculated (requires SLOC) + if analyzed_package.sloc: + assert analyzed_package.maintainability is not None + assert 0 <= analyzed_package.maintainability.maintainability_index <= 100 + assert analyzed_package.maintainability.maintainability_level in [ + "Low", + "Medium", + "High", + ] + + # Verify cost estimate + assert analyzed_package.cost_estimate is not None + assert analyzed_package.cost_estimate.cost_usd > 0 + assert analyzed_package.cost_estimate.maturity_multiplier >= 1.0 + assert analyzed_package.cost_estimate.halstead_multiplier >= 0.8 + assert analyzed_package.cost_estimate.halstead_multiplier <= 1.8 + + +@pytest.mark.asyncio +async def test_e2e_analysis_comparison_with_and_without_git(): + """Test that analysis with git history produces higher estimates than without.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + project_path = tmppath / "test_project" + project_path.mkdir() + + # Create test project with extensive git history + create_test_project_with_git(project_path) + + # Add many more commits to simulate mature project + for i in range(20): + (project_path / f"file{i}.py").write_text(f"# File {i}\nvalue = {i}") + subprocess.run( + ["git", "add", "."], cwd=project_path, check=True, capture_output=True + ) + subprocess.run( + ["git", "commit", "-m", f"Add file {i}"], + cwd=project_path, + check=True, + capture_output=True, + ) + + package = Package( + name="test-project", + version="1.0.0", + ecosystem="pypi", + repository_url=str(project_path), + project_type=ProjectType.LIBRARY, + ) + + config = AnalysisConfig( + clone_repos=True, + use_cache=False, + region=Region.GLOBAL_AVERAGE, + ) + + result = await analyze([package], config) + + analyzed = result.packages[0] + + # Note: git history may not be available for local paths without proper cloning + # If it is available, verify maturity multiplier + if analyzed.git_history: + assert analyzed.cost_estimate.maturity_multiplier > 1.0 + + # Cost should be calculated + assert analyzed.cost_estimate is not None + assert analyzed.cost_estimate.cost_usd > 0 + + +@pytest.mark.asyncio +async def test_e2e_summary_statistics(): + """Test that summary statistics include all metrics.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + + # Create two test projects + for proj_num in range(2): + project_path = tmppath / f"project_{proj_num}" + project_path.mkdir() + create_test_project_with_git(project_path) + + packages = [ + Package( + name=f"project-{i}", + version="1.0.0", + repository_url=str(tmppath / f"project_{i}"), + project_type=ProjectType.LIBRARY, + ) + for i in range(2) + ] + + config = AnalysisConfig( + clone_repos=True, + use_cache=False, + region=Region.GLOBAL_AVERAGE, + ) + + result = await analyze(packages, config) + + # Verify summary statistics + assert result.summary["total_packages"] == 2 + assert result.summary["analyzed_packages"] >= 1 + assert result.summary["total_sloc"] > 0 + assert result.summary["total_cost_usd"] > 0 + assert result.summary["total_effort_person_months"] > 0 + assert result.summary["total_effort_person_years"] > 0 + + # Verify all packages were analyzed + for package in result.packages: + if package.cost_estimate: + # Should have all new metrics if analysis succeeded + assert hasattr(package.cost_estimate, "maturity_multiplier") + assert hasattr(package.cost_estimate, "halstead_multiplier") + + +@pytest.mark.asyncio +async def test_e2e_without_cloning(): + """Test that analysis works without cloning (no git/Halstead metrics).""" + package = Package( + name="test-package", + version="1.0.0", + ecosystem="pypi", + project_type=ProjectType.LIBRARY, + ) + + config = AnalysisConfig( + clone_repos=False, # Don't clone + use_cache=False, + region=Region.GLOBAL_AVERAGE, + ) + + result = await analyze([package], config) + + analyzed = result.packages[0] + + # Without cloning, should not have git/Halstead metrics + assert analyzed.git_history is None + assert analyzed.halstead is None + assert analyzed.maintainability is None + + # But cost estimate should still work with defaults + # (Package has no SLOC, so cost will be 0, but multipliers should be 1.0) + if analyzed.cost_estimate: + assert analyzed.cost_estimate.maturity_multiplier == 1.0 + assert analyzed.cost_estimate.halstead_multiplier == 1.0