From d985f7e7b0a9fa5596ca510fd3239fdd8331a6fc Mon Sep 17 00:00:00 2001 From: oscarvalenzuelab Date: Mon, 8 Dec 2025 23:25:25 -0800 Subject: [PATCH 1/2] Fix URL security vulnerabilities in string matching Security issue: String matching for domains and protocols at arbitrary positions in URLs could be exploited with malicious URLs like: - evil.com/git+malicious (matches "git+" check) - evil-github.com-fake.ru (matches "github.com" check) Changes: 1. src/ossval/parsers/spdx.py: - Add _is_git_url() helper method for safe URL validation - Properly parse URLs and check domain via urlparse.netloc - Check protocol prefixes with startswith() instead of 'in' - Fix 3 vulnerable checks in JSON and tag-value parsing 2. src/ossval/analyzers/repo_finder.py: - Fix _is_valid_git_url() to parse URLs and check netloc - Fix _normalize_git_url() to safely check domains - Use urlparse to extract and verify domain names - Check protocol prefixes with startswith() 3. src/ossval/core.py: - Fix GitHub health check to parse URL and validate netloc - Replace unsafe 'in' check with urlparse validation - Only analyze health for actual github.com domain Security improvements: - All domain checks now use urlparse().netloc == "domain.com" - Protocol checks use startswith() instead of substring matching - Prevents bypasses via domain name in path or subdomain - All tests passing (94 tests) --- src/ossval/analyzers/repo_finder.py | 37 ++++++++++++------- src/ossval/core.py | 14 +++++--- src/ossval/parsers/spdx.py | 56 ++++++++++++++++++++++------- 3 files changed, 79 insertions(+), 28 deletions(-) diff --git a/src/ossval/analyzers/repo_finder.py b/src/ossval/analyzers/repo_finder.py index fa4ac92..0d927c2 100644 --- a/src/ossval/analyzers/repo_finder.py +++ b/src/ossval/analyzers/repo_finder.py @@ -108,17 +108,25 @@ def _is_valid_git_url(url: str) -> bool: """Check if URL looks like a valid git repository.""" if not url: return False + url_lower = url.lower() - # Check for common git hosting platforms (with or without trailing slash) - return ( - "github.com" in url_lower - or "gitlab.com" in url_lower - or "bitbucket.org" in url_lower - or url_lower.startswith("git+") - or url_lower.endswith(".git") - or url_lower.endswith(".git/") - or url_lower.startswith("git://") - ) + + # Check for git protocol prefixes + if url_lower.startswith(("git+", "git://", "git@")): + return True + + # Check for .git suffix + if url_lower.endswith(".git") or url_lower.endswith(".git/"): + return True + + # Check for common git hosting platforms by parsing domain + try: + # Add protocol if missing for parsing + parse_url = url_lower if "://" in url_lower else f"https://{url_lower}" + parsed = urlparse(parse_url) + return parsed.netloc in ["github.com", "gitlab.com", "bitbucket.org"] + except Exception: + return False def _normalize_git_url(url: str) -> str: @@ -149,8 +157,13 @@ def _normalize_git_url(url: str) -> str: # Ensure it starts with http:// or https:// if not url.startswith(("http://", "https://")): - if "github.com" in url or "gitlab.com" in url or "bitbucket.org" in url: - url = f"https://{url}" + # Parse URL with dummy protocol to check domain safely + try: + parsed = urlparse(f"https://{url}") + if parsed.netloc in ["github.com", "gitlab.com", "bitbucket.org"]: + url = f"https://{url}" + except Exception: + pass # Add .git suffix if it's a GitHub/GitLab/Bitbucket URL try: diff --git a/src/ossval/core.py b/src/ossval/core.py index d9269c5..c108ac8 100644 --- a/src/ossval/core.py +++ b/src/ossval/core.py @@ -4,6 +4,7 @@ from datetime import datetime from pathlib import Path from typing import List, Optional +from urllib.parse import urlparse from ossval import __version__ from ossval.analyzers import ( @@ -179,10 +180,15 @@ async def _analyze_package( package.warnings.append(f"Error calculating maintainability index: {str(e)}") # Analyze health metrics (GitHub only) - if package.repository_url and "github.com" in package.repository_url.lower(): - health = await analyze_health(package.repository_url, config.github_token) - if health: - package.health = health + if package.repository_url: + try: + parsed = urlparse(package.repository_url.lower()) + if parsed.netloc == "github.com": + health = await analyze_health(package.repository_url, config.github_token) + if health: + package.health = health + except Exception: + pass return package diff --git a/src/ossval/parsers/spdx.py b/src/ossval/parsers/spdx.py index c605ad2..6538e13 100644 --- a/src/ossval/parsers/spdx.py +++ b/src/ossval/parsers/spdx.py @@ -4,6 +4,7 @@ import re from pathlib import Path from typing import List +from urllib.parse import urlparse from ossval.models import Package, SourceType from ossval.parsers.base import BaseParser, ParseResult @@ -32,6 +33,39 @@ def can_parse(self, filepath: str) -> bool: return False return False + @staticmethod + def _is_git_url(url: str) -> bool: + """ + Safely check if a URL is a git repository URL. + + Checks: + - Protocol starts with git+ (e.g., git+https://) + - Domain is github.com, gitlab.com, or bitbucket.org + + Args: + url: URL string to check + + Returns: + True if it's a git URL, False otherwise + """ + if not url or url == "NOASSERTION": + return False + + # Check if it starts with git+ protocol + if url.startswith("git+"): + return True + + # Parse URL and check domain + try: + # Handle git@ SSH URLs + if url.startswith("git@"): + return True + + parsed = urlparse(url if "://" in url else f"https://{url}") + return parsed.netloc in ["github.com", "gitlab.com", "bitbucket.org"] + except Exception: + return False + def parse(self, filepath: str) -> ParseResult: """Parse SPDX SBOM file.""" path = Path(filepath) @@ -86,10 +120,9 @@ def _parse_json(self, filepath: str) -> List[Package]: # Extract download location as fallback if not repository_url: download_location = pkg_data.get("downloadLocation", "") - if download_location and download_location != "NOASSERTION": + if self._is_git_url(download_location): # Try to extract git URL - if "git+" in download_location or "github.com" in download_location: - repository_url = download_location.replace("git+", "").split("#")[0] + repository_url = download_location.replace("git+", "").split("#")[0] package = Package( name=name, @@ -133,18 +166,17 @@ def _parse_tag_value(self, filepath: str) -> List[Package]: ecosystem = self._extract_ecosystem_from_purl(f"pkg:{purl}") if ecosystem: current_package["ecosystem"] = ecosystem - elif "git+" in ref_line or "github.com" in ref_line: - # Extract repository URL - url_match = re.search(r"(https?://[^\s]+)", ref_line) - if url_match: + else: + # Try to extract repository URL + url_match = re.search(r"(https?://[^\s]+|git\+[^\s]+)", ref_line) + if url_match and self._is_git_url(url_match.group(1)): current_package["repository_url"] = url_match.group(1) elif line.startswith("PackageDownloadLocation:") and in_package: download_loc = line.split(":", 1)[1].strip() - if download_loc and download_loc != "NOASSERTION": - if "git+" in download_loc or "github.com" in download_loc: - current_package["repository_url"] = ( - download_loc.replace("git+", "").split("#")[0] - ) + if self._is_git_url(download_loc): + current_package["repository_url"] = ( + download_loc.replace("git+", "").split("#")[0] + ) # Don't forget the last package if in_package and current_package.get("name"): From d60bf0a20fde696e8f6ffaa888421a0deac0a009 Mon Sep 17 00:00:00 2001 From: oscarvalenzuelab Date: Mon, 8 Dec 2025 23:28:13 -0800 Subject: [PATCH 2/2] Bump version to 1.2.2 for security release - Update version in pyproject.toml and __init__.py - Add 1.2.2 release notes to CHANGELOG.md - Document security fixes for URL validation vulnerabilities --- CHANGELOG.md | 15 ++++++++++++++- pyproject.toml | 2 +- src/ossval/__init__.py | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd69855..6282588 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.2.2] - 2025-12-08 + +### Security +- **Fixed URL security vulnerabilities** in string matching patterns + - Fixed arbitrary position domain/protocol matching that could be exploited + - Prevented URL spoofing attacks via malicious URLs (e.g., `evil-github.com-fake.ru`) + - Replaced unsafe substring checks with proper URL parsing using `urlparse()` + - All domain checks now validate `netloc` exactly matches expected domain + - Protocol checks use `startswith()` instead of substring matching +- Affected files: `src/ossval/parsers/spdx.py`, `src/ossval/analyzers/repo_finder.py`, `src/ossval/core.py` +- Severity: Medium - Could allow URL spoofing/bypass in repository URL validation + ## [1.2.1] - 2025-12-08 ### Added @@ -158,6 +170,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Efficient SLOC counting with pygount - Optimized repository URL discovery -[Unreleased]: https://github.com/SemClone/ossval/compare/v1.2.1...HEAD +[Unreleased]: https://github.com/SemClone/ossval/compare/v1.2.2...HEAD +[1.2.2]: https://github.com/SemClone/ossval/compare/v1.2.1...v1.2.2 [1.2.1]: https://github.com/SemClone/ossval/compare/v1.0.1...v1.2.1 [1.0.1]: https://github.com/SemClone/ossval/releases/tag/v1.0.1 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index d5479de..c54071e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ossval" -version = "1.2.1" +version = "1.2.2" description = "Open Source Software Valuation - Calculate development cost savings from OSS dependencies" readme = "README.md" requires-python = ">=3.10" diff --git a/src/ossval/__init__.py b/src/ossval/__init__.py index a71abc0..34e9e62 100644 --- a/src/ossval/__init__.py +++ b/src/ossval/__init__.py @@ -1,6 +1,6 @@ """OSSVAL: Open Source Software Valuation Tool.""" -__version__ = "1.2.1" +__version__ = "1.2.2" from ossval.core import analyze, parse_sbom, quick_estimate from ossval.models import AnalysisConfig, AnalysisResult, Region, ProjectType