diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..7065ff3 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "allow": [ + "Bash(ls -lh c:/repository/manifest-parser/test/resources/*.gradle* c:/repository/manifest-parser/test/resources/gradle.properties c:/repository/manifest-parser/test/resources/gradle/)" + ] + } +} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 299461f..da79c78 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,9 +16,11 @@ on: jobs: tag-and-release: runs-on: ubuntu-latest + permissions: + contents: write steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 # need full history for tags @@ -59,7 +61,7 @@ jobs: git push origin "${{ env.new_version }}" - name: Create GitHub Release - uses: actions/create-release@v1 + uses: actions/create-release@0cb9c9b65d5d1901c1f53e5e66eaf4afd303e70e # v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6b5d809 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,262 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Go module that parses package manifests from multiple ecosystems (Maven, npm, Python, Go, .NET, Gradle, SBT) and returns each declared dependency along with the **exact line/character range** of its declaration. Consumed by [AST-CLI](https://github.com/Checkmarx/ast-cli) to correlate manifest entries with Checkmarx runtime scans — so the `Locations` field is part of the public contract, not a debugging convenience. + +**Status:** Active / maintained. Part of the Checkmarx One SCA pipeline. + +## Technology Stack + +| Component | Details | +|-----------|---------| +| Language | Go 1.23.0 / toolchain go1.24.2 | +| Test framework | `github.com/stretchr/testify v1.8.4` | +| Go module parsing | `golang.org/x/mod v0.24.0` | +| XML parsing | stdlib `encoding/xml` | +| JSON output | stdlib `encoding/json` | +| Database | None | +| Web framework | None | +| Dependencies | Vendored (`vendor/`) — no `go mod download` required | + +## Repository Structure + +``` +cmd/ CLI entry point (main.go) +pkg/parser/ Public API + ├── parser.go Parser interface + ├── parser_factory.go ParsersFactory — sole public entry point + ├── manifest-file-selector.go Filename → Manifest enum mapping + └── models/ + └── package_model.go Package / Location structs +internal/parsers/ Per-ecosystem implementations (not importable by callers) + ├── dotnet/ + ├── golang/ + ├── gradle/ + ├── maven/ + ├── npm/ + ├── poetry/ + ├── pypi/ + ├── sbt/ + └── setuptools/ +internal/testdata/ Parser-specific test fixtures (sbt, Python, .NET) +test/resources/ Shared fixture files for all parser tests +vendor/ Vendored dependencies +.github/workflows/ci.yml CI pipeline (test + 60% coverage gate) +``` + +## Development Setup + +**Prerequisites:** Go ≥ 1.23, git. No other tools required — dependencies are vendored. + +```bash +git clone https://github.com/Checkmarx/manifest-parser.git +cd manifest-parser + +go test ./... # run all tests +go test ./internal/parsers/gradle/... # run tests for a single parser +go test -run TestName ./path/... # run a single test by name +go test ./... -coverprofile cover.out # CI gate: total coverage must be >= 60% +go tool cover -html cover.out # view coverage report in browser +go build -o manifest-parser ./cmd # build CLI +go run ./cmd test/resources/pom.xml # run CLI against a fixture +``` + +**Sample output** from `go run ./cmd test/resources/pom.xml`: + +```json +[ + { + "PackageManager": "mvn", + "PackageName": "junit:junit", + "Version": "4.13.2", + "FilePath": "test/resources/pom.xml", + "Locations": [{ "Line": 14, "StartIndex": 4, "EndIndex": 20 }] + } +] +``` + +## API / Interfaces + +The public API lives entirely under `pkg/`: + +**`Parser` interface** ([pkg/parser/parser.go](pkg/parser/parser.go)): +```go +type Parser interface { + Parse(manifestFile string) ([]models.Package, error) +} +``` + +**`ParsersFactory`** ([pkg/parser/parser_factory.go](pkg/parser/parser_factory.go)) — the **only** public entry point. Returns a concrete `Parser` for the given filename, or `nil` for unsupported files. + +**`Package` / `Location` structs** ([pkg/parser/models/package_model.go](pkg/parser/models/package_model.go)): +```go +type Package struct { + PackageManager string + PackageName string + Version string + FilePath string + Locations []Location +} + +type Location struct { + Line int // 0-based (all parsers) + StartIndex int // 0-based byte offset from start of line + EndIndex int // 0-based byte offset from start of line +} +``` + +Adding a new ecosystem: edit `manifest-file-selector.go`, add a case in `parser_factory.go`, and add a package under `internal/parsers/`. + +## Architecture + +The module is organized around one interface and a dispatching factory: + +- [pkg/parser/manifest-file-selector.go](pkg/parser/manifest-file-selector.go) — maps filename/extension to a `Manifest` enum. +- [pkg/parser/parser_factory.go](pkg/parser/parser_factory.go) — dispatches to the right concrete parser. +- [pkg/parser/models/package_model.go](pkg/parser/models/package_model.go) — `Locations` is a slice: Maven returns one entry per line of a multi-line `` block; most others return a single entry. + +Per-ecosystem parsers live under [internal/parsers/](internal/parsers/): + +- `gradle/` — parses `build.gradle` / `build.gradle.kts` (Groovy + Kotlin DSL) and `gradle/libs.versions.toml` version catalogs. Resolves variables from `gradle.properties` and `ext {}` blocks. `PackageManager` = `"gradle"`. +- `maven/` — parses `pom.xml` with `encoding/xml`, then re-scans the raw text to locate each `` block line by line. Resolves `${property}` vars from `` and falls back to `` for empty/ranged versions. Only **direct** `` are emitted (managed-only deps are intentionally skipped to avoid duplicates — see PR #15). `PackageManager` = `"mvn"`. +- `npm/` — parses `package.json` plus, if present as a sibling file, `package-lock.json` (v1 and v2/v3 formats). Ranged specifiers (`^`, `~`, `*`, `>`, `<`) trigger a lookup in the lockfile. Without a lock match, ranged versions resolve to `"latest"`. `PackageManager` = `"npm"`. +- `poetry/` — parses `pyproject.toml` (Poetry 1.x key-value format and PEP 621 array format) and resolves exact versions from a sibling `poetry.lock` if present. Supports exact versions, ranges (`^`, `~`, `>=`, `<=`), wildcards (`*`, `1.2.*`), inline tables, optional packages, dependency groups, and markers. Ranged/wildcard versions without a lock match resolve to `"latest"`. `PackageManager` = `"pypi"` (Poetry packages are PyPI packages). +- `pypi/` — line-oriented scan of `requirements*.txt`, `requirement*.txt`, `constraints*.txt`, and `packages*.txt`. Supports six Python dependency formats: pip, pip-freeze, pip-compile, pip-tools, uv export, and Poetry export. Features: line continuations (`\`), `--hash=` stripping, pip CLI option skipping (`-i`, `-r`, `-c`, `-e`, etc.), VCS requirements (`git+`, `hg+`, `svn+`, `bzr+` with `#egg=`), URL requirements (PEP 508 `pkg @ URL`), `===` arbitrary equality, and environment markers (`;`). `PackageManager` = `"pypi"`. +- `setuptools/` — two parsers for Python packaging manifests: `setup_cfg_parser.go` (`setup.cfg` INI format) and `setup_py_parser.go` (`setup.py` script). Both support `install_requires`, `setup_requires`, `tests_require`, and `extras_require`. Duplicate packages across sections are stored as separate entries with distinct line numbers. `PackageManager` = `"pypi"`. +- `golang/` — uses `golang.org/x/mod/modfile` to parse `go.mod`, then uses the parser's line metadata to compute character offsets. `PackageManager` = `"go"`. +- `dotnet/` — three parsers: `csproj_parser.go` (`.csproj`), `directory_packages_props_parser.go` (central package management), `packages_config_parser.go` (legacy). Bracketed version ranges become `"latest"`. `PackageManager` = `"nuget"` for all three. +- `sbt/` — parses any `.sbt` file (`build.sbt`, `plugins.sbt`, `dependencies.sbt`, etc.) using line-oriented scanning. Supports `val`/`lazy val`/`def` variable declarations, all SBT dependency operators (`%`, `%%`, `%%%`), `Seq(...)` blocks, `addSbtPlugin(...)` syntax, dependency modifiers (`exclude`, `excludeAll`, `intransitive`, `withSources`, `classifier`, `cross`), block and inline comments, scope annotations, and duplicate detection. `PackageManager` = `"sbt"`. + +## Project Rules (Invariants) + +- **`Location.Line` MUST be 0-based for ALL parsers.** When iterating `for i, line := range lines`, emit `Line: i` — never `i + 1`. Editors display 1-based line numbers; downstream consumers add `+1` for display. If parser output matches the editor's line number, it's off-by-one. +- **`Location.StartIndex` / `EndIndex` are 0-based byte offsets** from the start of the line. They are byte offsets, not rune/character offsets — relevant for non-ASCII manifests. +- **Unresolvable or ranged versions resolve to the literal string `"latest"`**, never an empty string. Callers branch on this value. +- **`PackageManager` strings are part of the contract**: `"gradle"`, `"mvn"`, `"npm"`, `"pypi"`, `"go"`, `"nuget"`, `"sbt"`. Don't rename them. +- Maven emits one `Location` per **non-comment line** of the `` block (open tag, each child element, close tag). Single-line `Locations` for Maven would be a regression. +- Do not add `ParsersFactory` overloads or alternative entry points without coordinating with AST-CLI. +- **Do not modify or rename existing `PackageManager` strings**. AST-CLI and Checkmarx One SCA branch on these values — a silent rename breaks downstream parsing with no compile-time error. If a rename is genuinely required, stop and confirm with the user. +- All Python parsers (`pypi/`, `poetry/`, `setuptools/`) return `PackageManager` = `"pypi"` because all Python packages ultimately live on PyPI regardless of the tool that declared them. Do not introduce separate strings like `"poetry"` or `"setuptools"`. + +## Testing Strategy + +Each parser has a `*_test.go` co-located with it using `testify`. Fixtures are split across two locations: + +**Shared fixtures** in [test/resources/](test/resources/): +``` +test/resources/ +├── build.gradle Groovy DSL +├── build.gradle.kts Kotlin DSL +├── gradle/libs.versions.toml Version catalog (80+ entries) +├── gradle.properties Centralized Gradle properties +├── pom.xml Maven +├── package.json npm +├── test_go.mod Go modules +├── Bootstrap.csproj .NET csproj +├── Gateway.csproj .NET csproj (variant) +├── Directory.Packages.props .NET centralized packages +├── packages.config .NET legacy NuGet +└── requirements.txt Python pip (basic format) +``` + +**Parser-specific fixtures** in [internal/testdata/](internal/testdata/): +``` +internal/testdata/ +├── build.sbt SBT build file (Log4Shell, Struts2, etc.) +├── plugins.sbt SBT plugin dependencies +├── pyproject.toml Poetry project configuration (requests, flask, pytest, numpy, pandas) +├── setup.cfg Setuptools INI format (requests, flask, six, pytest, black) +├── setup.py Setuptools Python script (same deps as setup.cfg) +└── ast-visual-studio-extension.csproj .NET multi-package csproj +``` + +**PyPI-format fixtures** in [internal/parsers/pypi/testdata/](internal/parsers/pypi/testdata/): +``` +internal/parsers/pypi/testdata/ +├── requirements-pip-freeze.txt pip freeze output (exact pinned versions) +├── requirements-pip-compile.txt pip-compile output with via comments +└── requirements-uv-export.txt uv export with --hash options and line continuations +``` + +When adding behaviours, add a fixture here rather than embedding large manifests in test source. + +CI ([.github/workflows/ci.yml](.github/workflows/ci.yml)) enforces a **60% total coverage floor** — adding an untested branch to an already-thin package can push the whole repo below the gate. View coverage locally with `go tool cover -html cover.out`. + +Expected pattern for a new parser: fixture file under `test/resources/` or `internal/testdata/` + `_parser_test.go` co-located with the parser, using `testify` assertions on `PackageName`, `Version`, `PackageManager`, and `Locations`. + +## Known Issues / Limitations + +- **pypi**: VCS requirements (`git+`, `hg+`, `svn+`, `bzr+`) require an `#egg=` fragment to extract the package name; VCS URLs without `#egg=` are skipped. URL requirements must use PEP 508 `pkg @ URL` syntax with the package name before `@`. +- **poetry**: Multi-line dependency tables spanning more than one line (e.g., `{git = "...", rev = "..."}` across lines) are not fully parsed — the dependency is skipped. Single-line inline tables are supported. +- **npm**: Ranged version specifiers (`^`, `~`, `*`, `>`, `<`) without a matching `package-lock.json` entry resolve to `"latest"` rather than the actual installed version. +- **Maven**: Managed-only deps (present in `` but not in ``) are not emitted, to avoid duplicating entries already declared in a BOM consumer. +- **dotnet**: Bracketed version ranges (e.g., `[1.0,2.0)`) become `"latest"`. +- **sbt**: Version variables using object member access (e.g., `Versions.log4j`) are not resolved — only simple `val`/`lazy val` string assignments are captured. +- **All parsers**: Direct dependencies only — transitive dependencies are not resolved or scanned. + +## External Integrations + +- **AST-CLI** ([Checkmarx/ast-cli](https://github.com/Checkmarx/ast-cli)) — primary consumer. Imports this module as a Go library. The fields `Locations`, `PackageManager`, `PackageName`, and `Version` on the `Package` struct are load-bearing: AST-CLI uses them to annotate scan results and drive remediation UI. Note: AST-CLI maps `"gradle"` and `"sbt"` to `"mvn"` when sending to the Checkmarx scanner API, since both build tools use Maven Central as their registry. +- **Checkmarx One SCA** — downstream scan engine that receives the parsed dependency list. + +## Deployment + +N/A — this is a Go library consumed via `go get github.com/Checkmarx/manifest-parser`. It is not deployed as a service. The CLI (`cmd/`) is a local testing convenience, not a production artifact. + +## Performance Considerations + +- Maven re-scans the raw XML bytes after `encoding/xml` parsing (two passes). Large `pom.xml` files are loaded fully into memory; there is no streaming. +- Gradle version catalog parsing reads `libs.versions.toml` once, separately from the build file. Large catalogs (80+ entries) are fine; pathologically large files are not size-bounded. +- pypi parser preprocesses all lines first to join continuations before parsing — the full file is held in memory. +- No caching between calls to `ParsersFactory` — each invocation allocates fresh parser state. + +## Security & Access + +- Parsers consume **untrusted manifest files** (user-supplied input): + - `encoding/xml` does **not** resolve external entities or DTDs by default — XXE is not a risk with the standard library decoder. + - There is no file-size limit enforced before reading. Callers in adversarial environments should validate file size before calling `Parse`. + - Path traversal: `ParsersFactory` accepts an arbitrary file path; callers are responsible for sanitising paths before passing them in. +- No credentials, secrets, or network calls inside any parser. + +## Logging + +- The **library** (`pkg/`, `internal/`) returns `error` values and does not log. Callers should not expect any log output from the library. +- The **CLI** (`cmd/main.go`) uses `log.Fatalf` on parse/marshal errors and exits non-zero. Normal output is JSON printed to stdout. +- Exception: `setuptools/` parsers use `log.Printf` for debug/warning output during development. This should be treated as temporary and removed before production release. + +## Coding Standards + +- `gofmt` and `go vet` clean — CI will fail otherwise. +- Exported identifiers live in `pkg/`; internal logic lives in `internal/`. Do not add exported symbols to `internal/`. +- Parser packages follow the naming layout: `internal/parsers//_parser.go` + `_parser_test.go`. +- No global state in parsers — each concrete parser type is a stateless zero-value struct. +- When splitting file content into lines, always strip `\r` to handle CRLF files on Windows: `strings.TrimRight(line, "\r")`. Failing to do so causes `len(line)` to return one extra byte, producing off-by-one `EndIndex` values. + +## Debugging Steps + +1. **Run one parser against a fixture:** + ```bash + go run ./cmd test/resources/pom.xml + go run ./cmd test/resources/build.gradle + go run ./cmd internal/testdata/build.sbt + go run ./cmd internal/testdata/pyproject.toml + go run ./cmd internal/testdata/setup.cfg + ``` + +2. **Verbose test output to see which test case fails:** + ```bash + go test -v ./internal/parsers/maven/... + go test -v ./internal/parsers/sbt/... + go test -v ./internal/parsers/poetry/... + ``` + +3. **Location off-by-one:** Parser violated 0-based contract. Grep for `i + 1` patterns near `Line:` / `LineNum:` assignments — emit `Line: i`, not `i + 1`. + +4. **EndIndex off-by-one on Windows:** File has CRLF line endings and the parser uses `len(line)` without stripping `\r`. Fix: add `strings.TrimRight(line, "\r")` after `strings.Split(content, "\n")`. + +5. **Version resolves to `"latest"` unexpectedly:** check whether the version string matches a range specifier (`^`, `~`, `[`, `*`) or whether a lock file / properties file is present in the same directory as the fixture. + +6. **New ecosystem not dispatched:** verify `selectManifestFile` in `manifest-file-selector.go` handles the new filename/extension and that the factory `switch` has a corresponding case. diff --git a/GRADLE_PARSER_IMPLEMENTATION_PLAN.md b/GRADLE_PARSER_IMPLEMENTATION_PLAN.md new file mode 100644 index 0000000..94dda14 --- /dev/null +++ b/GRADLE_PARSER_IMPLEMENTATION_PLAN.md @@ -0,0 +1,110 @@ +# Gradle Parser Implementation Plan + +## Overview +This document describes the implementation plan and execution steps for adding Gradle manifest parsing support to the `manifest-parser` repository. + +The parser was extended to support static Gradle dependency declarations in both Groovy and Kotlin DSL, including common production patterns such as multi-line dependencies and conditional `if` blocks. + +--- + +## Implementation Plan + +### 1. Analyze existing code flow +- Inspect `cmd/main.go` to understand entrypoint behavior. +- Review `pkg/parser/parser.go` and `pkg/parser/parser_factory.go` to understand the parser interface and factory logic. +- Review `pkg/parser/manifest-file-selector.go` to see how manifest types are detected. +- Review existing language parser implementations for style and output format. + +### 2. Add Gradle manifest detection +- Extend `pkg/parser/manifest-file-selector.go` to recognize `build.gradle` and `build.gradle.kts` files. +- Add a new `Manifest` type for Gradle. + +### 3. Add factory support for Gradle +- Update `pkg/parser/parser_factory.go` to import the new Gradle parser. +- Return the Gradle parser instance when the selected manifest is Gradle. + +### 4. Implement Gradle parser +- Create `internal/parsers/gradle/gradle_parser.go`. +- Implement the `Parser` interface for Gradle. +- Parse dependencies into `models.Package` entries. + +### 5. Add variable resolution +- Read `gradle.properties` values. +- Parse Groovy `ext {}` blocks and `ext.key` assignments. +- Parse Kotlin DSL `val` and `const val` declarations. +- Resolve `${var}` and `$var` references in dependency strings. + +### 6. Add support for multi-line dependency declarations +- Detect dependency statements spanning multiple lines. +- Join logical dependency lines before parsing. +- Support both string notation and map notation across line breaks. + +### 7. Add conditional dependency support +- Parse dependencies inside conditional blocks such as `if (...) { ... }`. +- Treat static declarations inside conditionals as valid parse targets. + +### 8. Add Kotlin DSL support +- Support Kotlin string syntax: `implementation("group:name:version")`. +- Support Kotlin map syntax: `implementation(group = "group", name = "name", version = "version")`. +- Support Kotlin-style dependency declarations in `build.gradle.kts`. + +### 9. Write regression tests +- Create or update `internal/parsers/gradle/gradle_parser_test.go`. +- Add tests for: + - basic Groovy dependencies + - multi-line dependencies + - conditional `if` block dependencies + - Kotlin DSL dependency syntax + +### 10. Validate +- Run `go test ./internal/parsers/gradle`. +- Confirm Gradle parser tests pass. +- Optionally run `go test ./...` to verify broader repository compatibility, noting existing unrelated test failures. + +--- + +## Files created or modified + +- `pkg/parser/manifest-file-selector.go` +- `pkg/parser/parser_factory.go` +- `internal/parsers/gradle/gradle_parser.go` +- `internal/parsers/gradle/gradle_parser_test.go` +- `test/resources/build.gradle` (sample Gradle fixture) + +--- + +## Supported Gradle parser features + +- Detection of `build.gradle` and `build.gradle.kts` files +- Parsing of common dependency configurations: + - `implementation`, `api`, `compile`, `compileOnly`, `runtime`, `runtimeOnly` + - `testImplementation`, `testCompile`, `testRuntimeOnly` + - `androidTestImplementation`, `annotationProcessor`, `classpath`, `kapt` +- String-style dependency declarations +- Map-style dependency declarations +- Multi-line dependency statements +- Dependencies inside `if (...) { ... }` blocks +- Variable resolution from: + - `gradle.properties` + - Groovy `ext` property blocks + - Groovy `ext.key = value` syntax + - Kotlin DSL `val` / `const val` +- Kotlin DSL dependency syntax +- Version cleanup for simple ranges and classifiers + +--- + +## Known limitations + +- Dynamic dependencies generated by build logic, loops, or plugin APIs are not resolved. +- Complex Kotlin DSL constructs beyond common forms may not be fully parsed. +- Conditional branch logic is not evaluated; all static declarations are treated as present. +- Deep nested DSL or custom Gradle extension syntax may be missed. +- Computed or function-based version expressions are not evaluated. +- Multi-project and included-build dependency resolution is not supported. + +--- + +## Notes + +The Gradle parser is now suitable for many production scanning scenarios in AST-CLI where static dependency declarations are present. For full Gradle model accuracy, additional Gradle-aware parsing or integration with Gradle tooling would be required. diff --git a/README.md b/README.md index 651cb66..6cf3ab3 100644 --- a/README.md +++ b/README.md @@ -1,135 +1,892 @@ -# manifest-parser +# Manifest Parser -A Go module for parsing package manifest files from multiple package managers. It extracts package names, versions, and their exact source locations (line and character offsets) from dependency declarations. +A production-grade Go library for parsing dependency manifests across multiple package managers. Extracts package dependencies from build files and dependency declarations in a standardized format for security scanning, SBOM generation, and dependency analysis. -This module is consumed by the [AST-CLI](https://github.com/Checkmarx/ast-cli) to identify declared dependencies and power Checkmarx runtime scans. +## 🎯 Purpose -## Supported Manifests +This parser extracts software dependencies from project manifest files and provides: +- **Standardized Package Output** - Consistent JSON format across all package managers +- **Version Tracking** - Precise version information for vulnerability scanning +- **Location Tracking** - File path and line numbers for each dependency +- **Security Scanning** - Integration with SCA (Software Composition Analysis) tools +- **SBOM Generation** - Software Bill of Materials (cyclonedx, spdx) support -| Ecosystem | File(s) | -|------------|------------------------------------------------------------| -| Maven | `pom.xml` | -| npm | `package.json` | -| Python | `requirements*.txt`, `packages*.txt` | -| Go | `go.mod` | -| .NET | `*.csproj`, `Directory.Packages.props`, `packages.config` | +## 📦 Supported Package Managers -## Installation +| Manager | Format | Status | Features | +|---------|--------|--------|----------| +| **Gradle** | `build.gradle`, `build.gradle.kts`, `libs.versions.toml` | ✅ Production | Latest DSL + catalogs + direct TOML parsing | +| **Maven** | `pom.xml` | ✅ Production | Properties, BOMs, ranges | +| **npm/Node.js** | `package.json` | ✅ Production | Dependencies, dev, peer, optional | +| **Go** | `go.mod` | ✅ Production | Direct imports, indirect | +| **.NET** | `.csproj`, `Directory.Packages.props`, `packages.config` | ✅ Production | Multi-format support | +| **Python** | `requirements.txt` | ✅ Production | Pip format with ranges | + +--- + +## 🚀 Quick Start + +### Installation ```bash go get github.com/Checkmarx/manifest-parser ``` -## Usage - -The entry point is the `ParsersFactory`, which selects the correct parser based on the manifest file name/extension. +### Usage ```go package main import ( - "encoding/json" "fmt" - "log" - "github.com/Checkmarx/manifest-parser/pkg/parser" ) func main() { - manifestFile := "path/to/pom.xml" - - p := parser.ParsersFactory(manifestFile) + // Create parser for manifest file + p := parser.ParsersFactory("path/to/package.json") if p == nil { - log.Fatalf("Unsupported manifest type: %s", manifestFile) + fmt.Println("Unsupported manifest type") + return } - packages, err := p.Parse(manifestFile) + // Parse dependencies + packages, err := p.Parse("path/to/package.json") if err != nil { - log.Fatalf("Error parsing manifest: %v", err) + fmt.Println("Error:", err) + return + } + + // Process results + for _, pkg := range packages { + fmt.Printf("%s:%s@%s\n", pkg.PackageManager, pkg.PackageName, pkg.Version) + } +} +``` + +### Command Line + +```bash +# Parse any supported manifest +go run cmd/main.go path/to/manifest + +# Examples +go run cmd/main.go project/pom.xml +go run cmd/main.go project/package.json +go run cmd/main.go project/build.gradle +go run cmd/main.go project/go.mod +``` + +--- + +## 📋 Detailed Parser Documentation + +### 1. Gradle Parser + +**Files:** `build.gradle`, `build.gradle.kts`, `gradle/libs.versions.toml` + +#### Features + +✅ **Groovy DSL** - Traditional Android/Java Gradle syntax +✅ **Kotlin DSL** - Modern type-safe Gradle syntax +✅ **gradle.properties** - Centralized property management +✅ **Version Catalog** - `gradle/libs.versions.toml` (Gradle 7.0+) +✅ **BOM/Platform** - Dependency Bill of Materials imports +✅ **Multi-Module** - Subproject and module-specific configurations +✅ **19 Configurations** - implementation, api, testImplementation, debugImplementation, ksp, etc. + +#### Dependency Declaration Support + +```gradle +// String notation +implementation 'org.springframework:spring-core:5.3.20' + +// Kotlin DSL +implementation("org.springframework:spring-core:5.3.20") + +// Map notation +implementation group: 'org.springframework', name: 'spring-core', version: '5.3.20' + +// Platform/BOM +implementation platform('org.springframework.boot:spring-boot-dependencies:2.7.0') + +// Version Catalog +implementation(libs.spring.core) +``` + +#### Variable Resolution + +```gradle +// gradle.properties +springVersion=5.3.20 + +// build.gradle +implementation "org.springframework:spring-core:${springVersion}" + +// ext blocks +ext { + log4jVersion = '2.17.1' +} +dependencies { + implementation "org.apache.logging.log4j:log4j-core:$log4jVersion" +} +``` + +#### Supported Configurations + +| Type | Purpose | +|------|---------| +| `implementation` | Runtime + compile dependencies | +| `api` | Public API (exported to consumers) | +| `compileOnly` | Compile-time only (e.g., annotations) | +| `runtimeOnly` | Runtime-only (excluded from compile) | +| `testImplementation` | Test-only dependencies | +| `debugImplementation` | Debug build variant | +| `releaseImplementation` | Release build variant | +| `annotationProcessor` | Annotation code generation | +| `ksp` / `kapt` | Kotlin/Java code generation | +| `classpath` | Buildscript dependencies | +| Plus 9 more variants for testing, fixtures, lint checks | + +#### Example: Multi-Module Project + +```kotlin +// build.gradle.kts +subprojects { + apply(plugin = "java") + + dependencies { + implementation("org.springframework.boot:spring-boot-starter-web") + } +} + +project(":api-module") { + dependencies { + implementation(project(":core")) + implementation("org.springframework.security:spring-security-core:5.7.1") } +} +``` + +#### Version Catalog Support + +**Direct Parsing:** You can now parse `libs.versions.toml` directly! + +```bash +# Parse version catalog directly +go run cmd/main.go gradle/libs.versions.toml +``` - out, _ := json.MarshalIndent(packages, "", " ") - fmt.Println(string(out)) +**Catalog Format:** + +```toml +# gradle/libs.versions.toml +[versions] +spring-version = "5.3.20" + +[libraries] +spring-core = { module = "org.springframework:spring-core", version.ref = "spring-version" } + +[bundles] +spring = ["spring-core", "spring-context"] +``` + +**Automatic Discovery:** When parsing `build.gradle` or `build.gradle.kts`, the parser automatically discovers and parses `gradle/libs.versions.toml` in the same directory. + +#### Parser Capabilities + +**Build File Parsing:** +- ✅ Parses Groovy and Kotlin DSL +- ✅ Resolves variables from gradle.properties +- ✅ Discovers and parses version catalogs +- ✅ Unwraps platform()/enforcedPlatform() BOMs +- ✅ Walks up directory tree for parent properties +- ✅ Filters out project references (multi-module) +- ✅ Skips file references (local JARs) +- ✅ Handles multi-line declarations +- ✅ Parses conditional if blocks + +**Version Catalog Parsing:** +- ✅ Direct parsing of `libs.versions.toml` files +- ✅ Extracts all 80+ library definitions +- ✅ Resolves version references +- ✅ Supports all catalog formats (simple, module, key-value) +- ✅ Works standalone or auto-discovered by build files + +**General:** +- ❌ Does not evaluate dynamic Gradle code + +#### Test Resources + +``` +test/resources/ +├── build.gradle - Groovy DSL with subprojects +├── build.gradle.kts - Kotlin DSL with 5 modules +├── gradle.properties - Centralized properties +└── gradle/libs.versions.toml - 80+ catalog entries +``` + +**Test Coverage:** 16 passing tests including platform dependencies, version catalogs, extended configurations, parent property inheritance + +--- + +### 2. Maven Parser + +**File:** `pom.xml` + +#### Features + +✅ **Dependency Management** - BOM imports and managed versions +✅ **Multi-Module** - Parent/child POM relationships +✅ **Properties** - Variable substitution with `${property}` +✅ **Version Ranges** - `[1.0,2.0)` notation handling +✅ **Scopes** - compile, runtime, test, provided, optional, system +✅ **Location Tracking** - Exact line numbers in POM files + +#### Dependency Declaration Support + +```xml + + + org.springframework + spring-core + 5.3.20 + + + + + junit + junit + 4.13.2 + test + + + + + org.springframework + spring-core + ${spring.version} + + + + + com.example + library + [1.0,2.0) + + + + + + + org.springframework.boot + spring-boot-dependencies + 2.7.0 + pom + import + + + +``` + +#### Property Resolution + +```xml + + 5.3.20 + + + +${spring.version} +``` + +#### Dependency Scopes + +| Scope | Purpose | +|-------|---------| +| `compile` | Runtime + compile (default) | +| `test` | Test-only dependencies | +| `runtime` | Runtime-only | +| `provided` | Compile-only, provided at runtime | +| `optional` | Included optionally | +| `system` | Local filesystem JAR | + +#### Parser Capabilities + +- ✅ Parses POM XML structure +- ✅ Resolves properties and version ranges +- ✅ Handles BOM imports and managed dependencies +- ✅ Tracks multi-line elements +- ✅ Extracts scope information +- ✅ Locates exact line numbers +- ✅ Supports parent POM references + +#### Example: Multi-Module Project + +```xml + +com.example +parent +1.0.0 +pom + + + core + api + + + + + com.example + parent + 1.0.0 + + +core + + + + org.springframework + spring-core + ${spring.version} + + +``` + +--- + +### 3. NPM/Node.js Parser + +**File:** `package.json` + +#### Features + +✅ **Dependency Types** - dependencies, devDependencies, peerDependencies, optionalDependencies +✅ **Version Resolution** - Resolves ranges using package-lock.json +✅ **Exact Versions** - Extracts actual installed versions from lock files +✅ **Range Handling** - `^1.0.0`, `~1.0.0`, `*`, ranges + +#### Dependency Declaration Support + +```json +{ + "dependencies": { + "express": "4.18.2", + "lodash": "^4.17.21" + }, + "devDependencies": { + "jest": "~29.0.0", + "webpack": "*" + }, + "peerDependencies": { + "react": "^18.0.0" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } +} +``` + +#### Version Specifiers + +| Format | Meaning | +|--------|---------| +| `1.2.3` | Exact version | +| `^1.2.3` | Compatible with 1.2.3 (up to 2.0.0) | +| `~1.2.3` | Approximately 1.2.3 (up to 1.3.0) | +| `>=1.2.3` | Greater than or equal | +| `1.2.x` | Patch-level ranges | +| `*` | Any version | + +#### Dependency Types + +| Type | Purpose | +|------|---------| +| `dependencies` | Production dependencies | +| `devDependencies` | Development-only (testing, bundling) | +| `peerDependencies` | Consumer-provided dependencies | +| `optionalDependencies` | Optional packages | + +#### Parser Capabilities + +- ✅ Parses package.json JSON +- ✅ Resolves version ranges using package-lock.json +- ✅ Extracts all 4 dependency types +- ✅ Handles multiple version specifiers +- ✅ Provides exact installed versions + +#### Example: Large Project + +```json +{ + "name": "my-app", + "version": "1.0.0", + "dependencies": { + "react": "18.2.0", + "react-dom": "18.2.0", + "axios": "^1.4.0" + }, + "devDependencies": { + "@babel/core": "^7.22.0", + "webpack": "^5.88.0", + "jest": "~29.0.0" + } } ``` -### Package Model +--- + +### 4. Go Modules Parser + +**File:** `go.mod` + +#### Features + +✅ **Module Dependencies** - Direct and indirect imports +✅ **Version Pinning** - Exact semver versions +✅ **Replace Directives** - Local and remote replacements +✅ **Exclude Directives** - Version exclusions +✅ **Go Version** - Minimum Go version requirement + +#### Dependency Declaration Support + +```go +module github.com/example/project + +go 1.19 + +require ( + github.com/gorilla/mux v1.8.0 + github.com/google/uuid v1.3.0 +) + +require ( + github.com/stretchr/testify v1.8.4 // indirect +) + +replace ( + github.com/old/module => github.com/new/module v1.2.3 + github.com/local/module => ./local/path +) + +exclude ( + github.com/bad/module v1.0.0 +) +``` + +#### Dependency Status + +| Type | Purpose | +|------|---------| +| `require` | Direct dependencies | +| `require (indirect)` | Transitive dependencies | +| `replace` | Local/remote replacements | +| `exclude` | Excluded versions | + +#### Parser Capabilities + +- ✅ Parses go.mod file format +- ✅ Extracts direct and indirect imports +- ✅ Handles replace and exclude directives +- ✅ Tracks minimum Go version +- ✅ Provides exact line numbers -Each parser returns a slice of `models.Package`: +#### Example: Complex Project + +```go +module github.com/checkmarx/scanner + +go 1.20 + +require ( + github.com/spf13/cobra v1.7.0 + github.com/sirupsen/logrus v1.9.3 +) + +require ( + github.com/inconshreveable/log15 v2.3.2 // indirect + golang.org/x/sys v0.10.0 // indirect +) + +replace github.com/local/package => ../local/package + +exclude golang.org/x/text v0.3.0 +``` + +--- + +### 5. .NET / C# Parser + +**Files:** `.csproj`, `Directory.Packages.props`, `packages.config` + +#### Features + +✅ **Project References** - `.csproj` PackageReference elements +✅ **Centralized Management** - `Directory.Packages.props` for monorepos +✅ **Legacy Format** - `packages.config` (NuGet v2) +✅ **Target Frameworks** - Framework-specific dependencies +✅ **Metadata** - Version, Include, Exclude attributes + +#### Dependency Declaration Support + +##### `.csproj` Format (Modern) + +```xml + + + + + +``` + +##### `Directory.Packages.props` (Centralized) + +```xml + + true + + + + + + +``` + +##### `packages.config` (Legacy NuGet) + +```xml + + + + + +``` + +#### Package Metadata + +| Attribute | Purpose | +|-----------|---------| +| `Include` / `id` | Package name | +| `Version` | Semantic version | +| `TargetFramework` | Framework specificity | +| `Condition` | Conditional inclusion | +| `Exclude` | Excluded frameworks | + +#### Parser Capabilities + +- ✅ Parses `.csproj` XML structure +- ✅ Extracts `Directory.Packages.props` central versions +- ✅ Handles legacy `packages.config` format +- ✅ Respects framework-specific conditions +- ✅ Tracks line numbers and locations + +#### Example: Multi-Framework Project + +```xml + + + + net6.0;net8.0;net472 + + + + + + + +``` + +--- + +### 6. Python / Pip Parser + +**File:** `requirements.txt` + +#### Features + +✅ **Pip Format** - Standard Python dependency format +✅ **Version Specifiers** - `==`, `>=`, `<=`, `~=`, ranges +✅ **Comments & Empty Lines** - Properly ignored +✅ **Environment Markers** - OS/Python version conditions +✅ **Git References** - VCS dependencies + +#### Dependency Declaration Support + +```txt +# Production dependencies +Django==4.2.0 +djangorestframework>=3.14.0,<4.0 +requests~=2.31.0 + +# Dev dependencies +pytest>=7.0.0 +black==23.0.0 + +# Git references +git+https://github.com/example/repo.git@main#egg=mypackage + +# With environment markers +pywin32>=300; sys_platform == 'win32' +``` + +#### Version Specifiers + +| Specifier | Meaning | +|-----------|---------| +| `==1.4.2` | Exact version | +| `>=1.4.2` | Greater than or equal | +| `<=1.4.2` | Less than or equal | +| `!=1.4.2` | Not equal | +| `~=1.4.2` | Compatible release (1.4.x) | +| `*` | Any version | + +#### Environment Markers + +```txt +# Platform-specific +pywin32>=300; sys_platform == 'win32' + +# Python version specific +dataclasses; python_version < '3.7' + +# Complex conditions +numpy>=1.20; python_version >= '3.8' and sys_platform != 'win32' +``` + +#### Parser Capabilities + +- ✅ Parses pip requirements format +- ✅ Extracts package names and versions +- ✅ Handles version specifier ranges +- ✅ Recognizes environment markers +- ✅ Ignores comments and blank lines + +#### Example: Complete Project + +```txt +# Python 3.8+ +Python>=3.8 + +# Web Framework +Flask==2.3.0 +Flask-SQLAlchemy>=3.0.0,<4.0 + +# Database +psycopg2-binary~=2.9.0 +SQLAlchemy>=2.0.0 + +# Testing +pytest>=7.0.0 +pytest-cov>=4.0.0 + +# Development +black==23.0.0 +flake8>=6.0.0 + +# OS-specific +pywin32>=300; sys_platform == 'win32' +``` + +--- + +## 📊 Output Format + +All parsers return a standardized `Package` structure: ```go type Package struct { - PackageManager string - PackageName string - Version string - FilePath string - Locations []Location + PackageManager string // "gradle", "maven", "npm", "go", "dotnet", "pip" + PackageName string // "group:name" or "name" + Version string // "1.2.3" + FilePath string // Path to manifest file + Locations []Location // Line numbers } type Location struct { - Line int - StartIndex int - EndIndex int + Line int // Line number (1-indexed) + StartIndex int // Character offset + EndIndex int // Character offset } ``` -`Locations` points to the exact position of the dependency declaration in the source manifest, which downstream tools use for inline annotations and remediation. +### JSON Output Example + +```json +[ + { + "packageManager": "gradle", + "packageName": "org.springframework:spring-core", + "version": "5.3.20", + "filePath": "build.gradle", + "locations": [ + { + "line": 42, + "startIndex": 0, + "endIndex": 0 + } + ] + }, + { + "packageManager": "maven", + "packageName": "com.google.guava:guava", + "version": "31.1-jre", + "filePath": "pom.xml", + "locations": [ + { + "line": 127, + "startIndex": 0, + "endIndex": 0 + } + ] + } +] +``` -## CLI +--- -A small CLI is provided under [cmd/main.go](cmd/main.go) for local testing: +## 🔒 Security & Vulnerability Detection -```bash -go run ./cmd -``` +This parser is designed to support security scanning and SCA (Software Composition Analysis) tools: -Example: +### Integration with Vulnerability Databases -```bash -go run ./cmd test/fixtures/pom.xml +``` +Dependency Extraction → Vulnerability Database → Risk Assessment + (NVD CVE) + (GitHub Advisory) + (Snyk Database) + (Sonatype OSS) ``` -## Project Layout +### Example: Detecting Log4j RCE -``` -cmd/ # CLI entry point -pkg/parser/ # Public API: Parser interface, factory, models -internal/parsers/ # Per-ecosystem parser implementations - ├── dotnet/ - ├── golang/ - ├── maven/ - ├── npm/ - └── pypi/ -test/ # Integration tests and fixtures +```gradle +dependencies { + implementation 'org.apache.logging.log4j:log4j-core:2.14.0' // CVE-2021-44228 +} ``` -## Integration with AST-CLI +Parser extracts → `org.apache.logging.log4j:log4j-core:2.14.0` +↓ +Vulnerability checker matches → CVE-2021-44228 (CRITICAL - Log4Shell RCE) -The [AST-CLI](https://github.com/Checkmarx/ast-cli) imports this module to discover declared dependencies from a scanned repository, feeding them into Checkmarx runtime scanning to correlate manifest declarations with runtime package usage. +--- -## Known Limitations +## 🏗️ Architecture -The following limitations apply when this parser is used as part of the Checkmarx One Developer Assist realtime OSS scanner (see the [official docs](https://docs.checkmarx.com/en/34965-405960-checkmarx-one-developer-assist.html)): +``` +Parser Interface (parser.go) + ↓ +Manifest Detection (manifest-file-selector.go) + ↓ +Parser Factory (parser_factory.go) + ↓ +Language-Specific Parsers + ├─ Gradle Parser (gradle/gradle_parser.go, gradle/version_catalog.go) + ├─ Maven Parser (maven/maven-pom-parser.go) + ├─ npm Parser (npm/package_json_parser.go) + ├─ Go Parser (golang/go-mod-parser.go) + ├─ .NET Parsers (dotnet/csproj_parser.go, etc.) + └─ Python Parser (pypi/pypi-parser.go) + ↓ +Standardized Package Output (models/package_model.go) +``` -- **Direct dependencies only** — vulnerabilities are identified only in packages declared directly in the manifest. Transitive dependencies are not resolved or scanned. -- **Version specifiers are not evaluated** — package managers commonly allow range/wildcard specifiers (e.g., `^`, `~`, `*`, etc.). The scanner does not resolve these; when encountered, it falls back to analyzing the *latest* version of the package. -- **Python `requirements.txt` format** — only traditional, manually authored files using the `package==version` format are supported. Auto-generated files (e.g., produced by `pip freeze`, `pip-tools`, `Poetry`) are not supported. -- **Scope vs. full SCA** — the realtime OSS scanner is intentionally lighter than the full Checkmarx One SCA scanner and is therefore less comprehensive. +--- -## Development +## 🧪 Testing -Run the test suite: +Run tests for all parsers: ```bash +# Run all tests go test ./... + +# Run specific parser tests +go test ./internal/parsers/gradle/ -v +go test ./internal/parsers/maven/ -v +go test ./internal/parsers/npm/ -v + +# With coverage +go test ./... -cover ``` -Build the CLI: +### Test Resources -```bash -go build -o manifest-parser ./cmd ``` +test/resources/ +├── build.gradle (Gradle DSL) +├── build.gradle.kts (Kotlin DSL) +├── pom.xml (Maven) +├── package.json (npm) +├── test_go.mod (Go Modules) +├── Bootstrap.csproj (.NET Framework) +├── Directory.Packages.props (.NET Centralized) +├── packages.config (.NET Legacy) +└── requirements.txt (Python) +``` + +--- + +## 📚 Documentation + +- [Gradle Parser Details](test/resources/GRADLE_TEST_FILES_README.md) - Comprehensive Gradle documentation with 31 vulnerable dependencies for testing +- [Maven Documentation](https://maven.apache.org/pom.html) +- [npm Documentation](https://docs.npmjs.com/cli/v10/configuring-npm/package-json) +- [Go Modules Documentation](https://go.dev/ref/mod) +- [NuGet Documentation](https://learn.microsoft.com/en-us/nuget/) +- [Pip Documentation](https://pip.pypa.io/) + +--- + +## 🤝 Contributing + +Contributions welcome! Focus areas: + +- [ ] Add Ruby Bundler support (Gemfile) +- [ ] Add PHP Composer support (composer.json) +- [ ] Add Rust Cargo support (Cargo.toml) +- [ ] Improve version range resolution +- [ ] Add more vulnerability test cases +- [ ] Performance optimizations + +--- + +## ⚖️ License + +This project is part of the Checkmarx AST (Application Security Testing) suite. + +--- + +## 🚀 Features Summary + +| Feature | Gradle | Maven | npm | Go | .NET | Python | +|---------|--------|-------|-----|----|----|--------| +| Multi-file format | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Property resolution | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| Version ranges | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | +| BOM imports | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| Multi-module | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | +| Line numbers | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Comments/ignored | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Scope separation | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | + +--- + +## 📝 Version History + +- **v3.0.0** - Added Gradle version catalog support, enhanced property resolution +- **v2.5.0** - Added .NET Directory.Packages.props support +- **v2.0.0** - Initial multi-parser support + +--- + +## 📧 Contact & Support + +For issues, questions, or feature requests: +- GitHub Issues: [manifest-parser/issues](https://github.com/Checkmarx/manifest-parser/issues) +- Security: [security@checkmarx.com](mailto:security@checkmarx.com) -## License +--- -See repository for license details. +**Made with ❤️ for secure software supply chain management** diff --git a/docs/sbt-parser-implementation-plan.md b/docs/sbt-parser-implementation-plan.md new file mode 100644 index 0000000..c46beed --- /dev/null +++ b/docs/sbt-parser-implementation-plan.md @@ -0,0 +1,189 @@ +# SBT Parser Implementation Plan + +## Context + +The manifest-parser repository supports Maven, npm, PyPI, Go modules, and .NET. The user needs to extend it with SBT (Scala Build Tool) support to parse SBT manifest files and extract dependencies. The implementation must follow existing patterns exactly, add duplicate detection, include comprehensive tests with vulnerable packages, and integrate cleanly without modifying existing parsers. + +### Supported SBT File Types + +SBT uses multiple file types that can declare dependencies. The parser supports **all `.sbt` files** via extension-based matching (like `.csproj` for dotnet): + +| File | Purpose | Syntax | +|------|---------|--------| +| `build.sbt` | Primary build definition | `libraryDependencies += "g" % "a" % "v"` | +| `plugins.sbt` | SBT plugin dependencies (in `project/`) | `addSbtPlugin("g" % "a" % "v")` | +| `dependencies.sbt` | Separated dependency definitions | Same as `build.sbt` | +| Any other `*.sbt` | SBT auto-loads all `.sbt` files in project root | Same as `build.sbt` | + +The core dependency regex `"g" % "a" % "v"` matches inside any wrapper (`addSbtPlugin(...)`, `libraryDependencies +=`, bare declarations), so all these file types are handled by the same parser with no special-casing needed. + +--- + +## Files to Create (3) + +### 1. `internal/parsers/sbt/sbt-parser.go` — Core Parser + +**Package:** `sbt` | **Struct:** `SbtParser{}` | **PackageManager string:** `"sbt"` + +**Parsing Strategy — Two-pass, regex-based (like PyPI parser but with Seq-block state tracking):** + +- **Pass 1:** Extract variable definitions into `map[string]string` +- **Pass 2:** Line-by-line dependency extraction with state machine for `Seq(...)` blocks + +#### Variable Extraction (Pass 1) + +Supports all Scala variable declaration forms used in SBT files: + +| Pattern | Example | Regex | +|---------|---------|-------| +| `val` | `val v = "1.0"` | `^\s*val\s+(\w+)\s*=\s*"([^"]+)"` | +| `lazy val` | `lazy val v = "1.0"` | `^\s*lazy\s+val\s+(\w+)\s*=\s*"([^"]+)"` | +| `def` | `def v = "1.0"` | `^\s*def\s+(\w+)\s*=\s*"([^"]+)"` | + +All three patterns are combined into a single regex: +``` +^\s*(?:lazy\s+)?(?:val|def)\s+(\w+)\s*=\s*"([^"]+)" +``` + +#### Dependency Extraction (Pass 2) + +**Core dependency regex:** +``` +"([^"]+)"\s+(%{1,3})\s+"([^"]+)"\s+%\s+(?:"([^"]+)"|(\w+))(?:\s+%\s+(?:"[^"]*"|\w+))? +``` +Captures: groupId, operator (`%`/`%%`/`%%%`), artifactId, version (quoted or variable name), optional scope (ignored). + +#### Helper functions: +- `extractVariables(lines []string) map[string]string` — supports `val`, `lazy val`, and `def` +- `resolveVersion(version string, vars map[string]string) string` — exact version as-is, variable lookup, unresolvable → `"latest"` +- `stripComments(line string, inBlockComment *bool) string` — handles `//` and `/* */` +- `computeLocationIndices(rawLine, groupId) (int, int)` — calculates start/end with modifier-aware trimming + +#### Duplicate detection: +`map[string]bool` keyed by `"groupId:artifactId"`. Skip duplicates silently (no `log.Printf` — this is a library, not a CLI; callers control their own logging). + +#### Comment handling: +Strip `//` inline comments; track `/* */` block comment state across lines. The `//` stripping is applied **after** the dependency regex match on the raw line, so `//` inside quoted strings in dependency declarations won't cause false truncation. + +#### Location tracking: +Single `Location` per package (like PyPI), `Line` is 0-indexed: +- `StartIndex` = position of first `"` of groupId in the raw line +- `EndIndex` = end of the dependency declaration, **excluding** trailing modifiers + +**Modifier-aware EndIndex calculation:** The `computeLocationIndices` function trims the following patterns from the end of the line when computing `EndIndex`: +- Trailing commas and whitespace +- Dependency modifiers: `exclude(...)`, `excludeAll(...)`, `classifier(...)`, `intransitive()`, `withSources()`, `withJavadoc()`, `cross(...)` +- Closing parentheses from `addSbtPlugin(...)` or `Seq(...)` wrappers +- Inline comments (`// ...`) + +This ensures the location span covers only the `"g" % "a" % "v"` core declaration. + +#### Imports: +Only stdlib — `os`, `regexp`, `strings`, `fmt` + `models` package. No `log` import (library code should not write to stderr). + +### 2. `internal/testdata/build.sbt` and `internal/testdata/plugins.sbt` — Test Fixtures + +**`build.sbt`** — Contains known-vulnerable dependencies: +- **log4j-core 2.14.0** (CVE-2021-44228 — Log4Shell) +- **jackson-databind 2.13.0** (multiple CVEs) +- **struts2-core 2.5.20** (CVE-2020-17530) +- **commons-collections 3.2.1** (deserialization vulnerability) +- **snakeyaml 1.26** (CVE-2022-1471) + +Exercises all parsing scenarios: `%`, `%%`, `%%%`, `Seq(...)`, variable-based versions, inline comments, block comments, scope annotations. + +**`plugins.sbt`** — Contains SBT plugin dependencies using `addSbtPlugin(...)` syntax to validate that the parser handles `plugins.sbt` files correctly. + +### 3. `internal/parsers/sbt/sbt-parser_test.go` — Comprehensive Tests + +**Table-driven + individual tests following Maven/PyPI patterns:** + +| # | Test | What it validates | +|---|------|-------------------| +| 1 | TestParseSingleDependency | Basic `libraryDependencies += "g" % "a" % "v"` | +| 2 | TestParseSingleDependencyDoublePercent | `%%` operator → PackageName is `g:a` (no Scala suffix) | +| 3 | TestParseSingleDependencyTriplePercent | `%%%` operator (Scala.js) → same as `%%` | +| 4 | TestParseSeqBlock | `libraryDependencies ++= Seq(...)` with multiple deps | +| 5 | TestParseWithScope | Trailing `% "test"` or `% Test` → parsed correctly, scope ignored | +| 6 | TestParseWithVariableVersion | `val v = "1.0"` then `% v` → resolves to `"1.0"` | +| 7 | TestParseWithUnresolvableVariable | Missing variable → version is `"latest"` | +| 8 | TestParseSingleLineComment | `//` comments are skipped | +| 9 | TestParseBlockComment | `/* ... */` spanning lines → deps inside skipped | +| 10 | TestParseEmptyFile | Returns empty slice, no error | +| 11 | TestParseDuplicateDependencies | Same `g:a` twice → first wins, second skipped | +| 12 | TestParseLocationAccuracy | Verify exact Line, StartIndex, EndIndex values | +| 13 | TestParseNonExistentFile | Returns error | +| 14 | TestParseMixedOperators | Mix of `%` and `%%` in same Seq | +| 15 | TestResolveVersion | Table-driven: exact, variable, missing, empty | +| 16 | TestParseAddSbtPlugin | `addSbtPlugin("g" % "a" % "v")` syntax from `plugins.sbt` | +| 17 | TestParseLazyVal | `lazy val v = "1.0"` → variable extracted and resolved | +| 18 | TestParseDef | `def v = "1.0"` → variable extracted and resolved | +| 19 | TestParseWithExclude | `"g" % "a" % "v" exclude("x", "y")` → parsed, EndIndex excludes modifier | +| 20 | TestParseWithIntransitive | `"g" % "a" % "v" intransitive()` → parsed, EndIndex excludes modifier | +| 21 | TestParseWithCross | `"g" % "a" % "v" cross CrossVersion.full` → parsed, EndIndex excludes modifier | +| 22 | TestParseWithExcludeAll | `"g" % "a" % "v" excludeAll(...)` → parsed, EndIndex excludes modifier | +| 23 | TestParseDependencyOverrides | `dependencyOverrides += "g" % "a" % "v"` → parsed correctly | +| 24 | TestParseWithClassifier | `"g" % "a" % "v" % "test" classifier "tests"` → parsed, classifier ignored | +| 25 | TestExtractVariables | Table-driven: val, lazy val, def, commented out, indented | +| 26 | TestSbtParser_Parse_RealFile | Parse `../../testdata/build.sbt` and validate against expected packages | +| 27 | TestSbtParser_Parse_PluginsFile | Parse `../../testdata/plugins.sbt` and validate plugin dependencies | + +--- + +## Files to Modify (3) + +### 4. `pkg/parser/manifest-file-selector.go` + +- Add `SbtBuild` to the `Manifest` iota enum (after `GoMod`) +- Add extension-based detection: `if manifestFileExtension == ".sbt" { return SbtBuild }` + - This matches **all** `.sbt` files (`build.sbt`, `plugins.sbt`, `dependencies.sbt`, etc.) + - Follows the same pattern used for `.csproj` detection + +### 5. `pkg/parser/parser_factory.go` + +- Add import: `"github.com/Checkmarx/manifest-parser/internal/parsers/sbt"` +- Add case: `case SbtBuild: return &sbt.SbtParser{}` + +### 6. `pkg/parser/manifest-file-selector_test.go` + +- Add `TestManifestFileSelector_ExpectSbtBuild` test for `build.sbt` +- Add `TestManifestFileSelector_ExpectSbtPlugins` test for `plugins.sbt` +- Add `TestManifestFileSelector_ExpectSbtCustom` test for `dependencies.sbt` + +--- + +## Implementation Order + +1. Create `internal/parsers/sbt/sbt-parser.go` (core parser) +2. Create `internal/testdata/build.sbt` (test fixture) +3. Create `internal/parsers/sbt/sbt-parser_test.go` (tests) +4. Modify `pkg/parser/manifest-file-selector.go` (enum + detection) +5. Modify `pkg/parser/manifest-file-selector_test.go` (selector test) +6. Modify `pkg/parser/parser_factory.go` (factory registration) +7. Run `go test ./...` to verify all tests pass with no regressions + +## Verification + +1. `go build ./...` — compiles cleanly +2. `go test ./internal/parsers/sbt/ -v` — all SBT parser tests pass +3. `go test ./pkg/parser/ -v` — selector + factory tests pass (including new SBT test) +4. `go test ./... -v` — full suite, no regressions +5. `go test ./... -cover` — check coverage +6. `go run cmd/main.go internal/testdata/build.sbt` — produces correct JSON output +7. `go run cmd/main.go internal/testdata/plugins.sbt` — produces correct JSON output for plugin dependencies + +--- + +## Production-Readiness Hardening (v2) + +The following gaps were identified after initial implementation and are addressed in the updated parser: + +| # | Gap | Fix | Impact | +|---|-----|-----|--------| +| 1 | `lazy val` not matched | Extend varRegex to `(?:lazy\s+)?(?:val\|def)` | **High** — many real projects use `lazy val` | +| 2 | `def` declarations not matched | Same regex extension | **Medium** — some projects use `def` for versions | +| 3 | Modifiers corrupt EndIndex | `computeLocationIndices` trims `exclude(...)`, `intransitive()`, `withSources()`, `withJavadoc()`, `cross(...)`, `classifier(...)` | **Medium** — common in complex builds | +| 4 | Closing `)` from wrappers in EndIndex | Trim trailing `)` after modifiers | **Medium** — affects `addSbtPlugin(...)` | +| 5 | `log.Printf` in library code | Remove all `log.Printf` calls — library consumers control their own logging | **Medium** — breaks clean library usage | +| 6 | `dependencyOverrides` not tested | Already works (regex is context-free), add explicit test | **Low** — verification only | +| 7 | `classifier` keyword | Already handled by optional scope group in regex, add explicit test | **Low** — verification only | \ No newline at end of file diff --git a/internal/parsers/dotnet/csproj_parser.go b/internal/parsers/dotnet/csproj_parser.go index 8e6d342..0cf768e 100644 --- a/internal/parsers/dotnet/csproj_parser.go +++ b/internal/parsers/dotnet/csproj_parser.go @@ -116,9 +116,12 @@ func (p *DotnetCsprojParser) Parse(manifestFile string) ([]models.Package, error return nil, fmt.Errorf("failed to read manifest file: %w", err) } - // Split content into lines for index computation + // Split content into lines for index computation (strip \r for CRLF files) strContent := string(content) lines := strings.Split(strContent, "\n") + for i := range lines { + lines[i] = strings.TrimRight(lines[i], "\r") + } // Create XML decoder decoder := xml.NewDecoder(strings.NewReader(strContent)) diff --git a/internal/parsers/golang/go-mod-parser.go b/internal/parsers/golang/go-mod-parser.go index 0f50f84..6516359 100644 --- a/internal/parsers/golang/go-mod-parser.go +++ b/internal/parsers/golang/go-mod-parser.go @@ -25,8 +25,11 @@ func (p *GoModParser) Parse(manifest string) ([]models.Package, error) { return nil, err } - // Split file into lines for position calculation + // Split file into lines for position calculation (strip \r for CRLF files) lines := strings.Split(string(data), "\n") + for i := range lines { + lines[i] = strings.TrimRight(lines[i], "\r") + } var packages []models.Package for _, req := range mf.Require { diff --git a/internal/parsers/gradle/gradle_parser.go b/internal/parsers/gradle/gradle_parser.go new file mode 100644 index 0000000..421c09f --- /dev/null +++ b/internal/parsers/gradle/gradle_parser.go @@ -0,0 +1,467 @@ +package gradle + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +// configKeywords defines all supported Gradle dependency configuration keywords +var configKeywords = `implementation|api|compile|compileOnly|runtime|runtimeOnly|` + + `testImplementation|testCompile|testCompileOnly|testRuntimeOnly|` + + `androidTestImplementation|debugImplementation|releaseImplementation|` + + `annotationProcessor|classpath|kapt|ksp|compileOnlyApi|` + + `testFixturesImplementation|testFixturesApi|lintChecks` + +// GradleParser implements parsing of Gradle build files +type GradleParser struct{} + +// Parse implements the Parser interface for Gradle build files +func (p *GradleParser) Parse(manifestFile string) ([]models.Package, error) { + content, err := os.ReadFile(manifestFile) + if err != nil { + return nil, fmt.Errorf("failed to read manifest file: %w", err) + } + + manifestContent := string(content) + + // Extract variables + variables := extractVariables(manifestFile, manifestContent) + + // Load version catalog if available + var catalog *VersionCatalog + if catalogPath := findVersionCatalog(manifestFile); catalogPath != "" { + catalog = parseVersionCatalog(catalogPath) + } + + var packages []models.Package + + // Parse main dependencies + mainDeps := parseDependencies(manifestContent, variables) + for i := range mainDeps { + mainDeps[i].FilePath = manifestFile + } + packages = append(packages, mainDeps...) + + // Parse version catalog dependencies (libs.xxx references) + if catalog != nil { + catalogDeps := parseVersionCatalogDependencies(manifestContent, catalog) + for i := range catalogDeps { + catalogDeps[i].FilePath = manifestFile + } + packages = append(packages, catalogDeps...) + } + + return packages, nil +} + +// extractVariables extracts variable definitions from the build file and gradle.properties +func extractVariables(manifestFile, content string) map[string]string { + vars := make(map[string]string) + + // Read gradle.properties if exists + gradlePropsPath := filepath.Join(filepath.Dir(manifestFile), "gradle.properties") + if propsContent, err := os.ReadFile(gradlePropsPath); err == nil { + parsePropertiesInto(string(propsContent), vars) + } + + // Walk up to project root for parent gradle.properties + projectRoot := findProjectRoot(filepath.Dir(manifestFile)) + if projectRoot != filepath.Dir(manifestFile) { + rootPropsPath := filepath.Join(projectRoot, "gradle.properties") + if propsContent, err := os.ReadFile(rootPropsPath); err == nil { + parsePropertiesInto(string(propsContent), vars) + } + } + + // Extract from ext blocks (Groovy) — handle all ext blocks, filter commented lines + extPattern := regexp.MustCompile(`(?s)ext\s*\{([^}]+)\}`) + for _, matches := range extPattern.FindAllStringSubmatch(content, -1) { + if len(matches) > 1 { + // Filter commented lines from ext block content + var filteredLines []string + for _, line := range strings.Split(matches[1], "\n") { + trimmed := strings.TrimSpace(line) + if !strings.HasPrefix(trimmed, "//") && !strings.HasPrefix(trimmed, "*") { + filteredLines = append(filteredLines, line) + } + } + extContent := strings.Join(filteredLines, "\n") + // Simple key = 'value' or key: 'value' + varPatterns := []*regexp.Regexp{ + regexp.MustCompile(`(\w+)\s*=\s*['"]([^'"]+)['"]`), + regexp.MustCompile(`(\w+)\s*:\s*['"]([^'"]+)['"]`), + } + for _, pattern := range varPatterns { + for _, match := range pattern.FindAllStringSubmatch(extContent, -1) { + if len(match) > 2 { + vars[match[1]] = match[2] + } + } + } + } + } + + // Extract ext.key = 'value' (outside blocks) + extVarPattern := regexp.MustCompile(`ext\.(\w+)\s*=\s*['"]([^'"]+)['"]`) + for _, match := range extVarPattern.FindAllStringSubmatch(content, -1) { + if len(match) > 2 { + vars[match[1]] = match[2] + } + } + + // Extract Kotlin DSL val/const + kotlinVarPatterns := []*regexp.Regexp{ + regexp.MustCompile(`(?:val|const val)\s+(\w+)\s*=\s*['"]([^'"]+)['"]`), + regexp.MustCompile(`(?:val|const val)\s+(\w+)\s*=\s*(\d+(?:\.\d+)*[^\s'"]*)`), // for versions without quotes + } + for _, pattern := range kotlinVarPatterns { + for _, match := range pattern.FindAllStringSubmatch(content, -1) { + if len(match) > 2 { + vars[match[1]] = match[2] + } + } + } + + return vars +} + +type dependencyStatement struct { + Line int + Text string + RawLines []rawLineInfo +} + +// rawLineInfo records a single source line that contributes to a dependency statement. +// Content is the raw line with \r stripped (no other trimming) so byte offsets stay accurate. +type rawLineInfo struct { + LineNum int + Content string +} + +// parseDependencies parses dependencies from the content +func parseDependencies(content string, variables map[string]string) []models.Package { + var packages []models.Package + + statements := extractDependencyStatements(content) + for _, stmt := range statements { + locations := computeGradleLocations(stmt.RawLines) + for _, pkg := range parseDependencyStatement(stmt.Text, variables) { + pkg.Locations = locations + packages = append(packages, pkg) + } + } + + return packages +} + +func extractDependencyStatements(content string) []dependencyStatement { + startPattern := regexp.MustCompile(`(?i)\b(` + configKeywords + `)\b`) + var statements []dependencyStatement + var buffer strings.Builder + var rawLines []rawLineInfo + active := false + startLine := 0 + + lines := strings.Split(content, "\n") + // Strip trailing \r so byte offsets are consistent on CRLF files + for i := range lines { + lines[i] = strings.TrimRight(lines[i], "\r") + } + + for i, raw := range lines { + line := strings.TrimSpace(raw) + if line == "" || strings.HasPrefix(line, "//") || strings.HasPrefix(line, "/*") || strings.HasPrefix(line, "*") { + continue + } + + if !active { + if startPattern.MatchString(line) { + // Skip non-Maven dependency references + if isProjectReference(line) || isFileReference(line) || isVersionCatalogReference(line) { + continue + } + active = true + startLine = i + buffer.Reset() + buffer.WriteString(line) + rawLines = []rawLineInfo{{LineNum: i, Content: raw}} + normalized := normalizePlatformDependency(buffer.String()) + if dependencyStatementComplete(normalized) { + statements = append(statements, dependencyStatement{Line: startLine, Text: normalized, RawLines: rawLines}) + active = false + } + } + continue + } + + buffer.WriteString(" ") + buffer.WriteString(line) + rawLines = append(rawLines, rawLineInfo{LineNum: i, Content: raw}) + normalized := normalizePlatformDependency(buffer.String()) + if dependencyStatementComplete(normalized) { + statements = append(statements, dependencyStatement{Line: startLine, Text: normalized, RawLines: rawLines}) + active = false + } + } + + return statements +} + +// computeGradleLocations emits one Location per contributing source line (Maven-style). +// For each line: StartIndex = offset of first non-whitespace character; EndIndex = end +// of code on the line, with any trailing // ... comment and trailing whitespace stripped. +func computeGradleLocations(rawLines []rawLineInfo) []models.Location { + locations := make([]models.Location, 0, len(rawLines)) + for _, rl := range rawLines { + code := stripInlineComment(rl.Content) + code = strings.TrimRight(code, " \t") + if strings.TrimSpace(code) == "" { + continue + } + startIdx := len(rl.Content) - len(strings.TrimLeft(rl.Content, " \t")) + locations = append(locations, models.Location{ + Line: rl.LineNum, + StartIndex: startIdx, + EndIndex: len(code), + }) + } + if len(locations) == 0 { + return nil + } + return locations +} + +// stripInlineComment removes a trailing `// ...` from a Gradle source line, +// taking quote state into account so // inside a quoted string is preserved. +func stripInlineComment(line string) string { + inSingle := false + inDouble := false + for i := 0; i < len(line)-1; i++ { + ch := line[i] + switch { + case ch == '\\' && (inSingle || inDouble): + i++ // skip escaped char + case ch == '\'' && !inDouble: + inSingle = !inSingle + case ch == '"' && !inSingle: + inDouble = !inDouble + case !inSingle && !inDouble && ch == '/' && line[i+1] == '/': + return line[:i] + } + } + return line +} + +func dependencyStatementComplete(statement string) bool { + kw := configKeywords + patterns := []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(` + kw + `)\s*['"]([^'"]+)['"]`), + regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*['"]([^'"]+)['"]\s*\)`), + regexp.MustCompile(`(?i)\b(` + kw + `)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`), + regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`), + regexp.MustCompile(`(?i)group\s*[:=]\s*['"]([^'"]+)['"].*name\s*[:=]\s*['"]([^'"]+)['"].*version\s*[:=]\s*['"]([^'"]+)['"]`), + regexp.MustCompile(`(?i)group\s*[:=]\s*[^,\s]+.*name\s*[:=]\s*[^,\s]+.*version\s*[:=]\s*[^,\s]+`), + } + + for _, pattern := range patterns { + if pattern.MatchString(statement) { + return true + } + } + + return false +} + +func parseDependencyStatement(statement string, variables map[string]string) []models.Package { + var packages []models.Package + + kw := configKeywords + patterns := []*regexp.Regexp{ + regexp.MustCompile(`(?i)\b(` + kw + `)\s*['"]([^'"]+)['"]`), + regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*['"]([^'"]+)['"]\s*\)`), + regexp.MustCompile(`(?i)\b(` + kw + `)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`), + regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`), + } + + for _, pattern := range patterns { + matches := pattern.FindStringSubmatch(statement) + if len(matches) > 0 { + var group, name, version string + if len(matches) == 3 { + depStr := resolveVariables(matches[2], variables) + parts := strings.Split(depStr, ":") + if len(parts) >= 2 { + group = parts[0] + name = parts[1] + if len(parts) > 2 { + version = strings.Join(parts[2:], ":") + } + } + } else if len(matches) == 5 { + group = resolveVariables(matches[2], variables) + name = resolveVariables(matches[3], variables) + version = resolveVariables(matches[4], variables) + } + + if group != "" && name != "" { + packages = append(packages, models.Package{ + PackageManager: "gradle", + PackageName: group + ":" + name, + Version: cleanVersion(version), + FilePath: "", + Locations: []models.Location{{}}, + }) + } + } + } + + if len(packages) == 0 { + if pkg := parseDependencyKeyValue(statement, variables); pkg != nil { + packages = append(packages, *pkg) + } + } + + return packages +} + +func parseDependencyKeyValue(statement string, variables map[string]string) *models.Package { + fields := map[string]string{} + + patterns := []*regexp.Regexp{ + regexp.MustCompile(`(?i)(group|name|version)\s*[:=]\s*['"]([^'"]+)['"]`), + regexp.MustCompile(`(?i)(group|name|version)\s*[:=]\s*([A-Za-z_][A-Za-z0-9_]*)`), + } + + for _, pattern := range patterns { + for _, match := range pattern.FindAllStringSubmatch(statement, -1) { + if len(match) > 2 { + key := strings.ToLower(match[1]) + value := match[2] + fields[key] = resolveVariables(value, variables) + } + } + } + + if fields["group"] == "" || fields["name"] == "" { + return nil + } + + return &models.Package{ + PackageManager: "gradle", + PackageName: fields["group"] + ":" + fields["name"], + Version: cleanVersion(fields["version"]), + FilePath: "", + Locations: []models.Location{{}}, + } +} + +// resolveVariables replaces ${var} or $var with values +func resolveVariables(str string, variables map[string]string) string { + // ${var} + re := regexp.MustCompile(`\$\{([^}]+)\}`) + str = re.ReplaceAllStringFunc(str, func(match string) string { + varName := strings.TrimSuffix(strings.TrimPrefix(match, "${"), "}") + if val, ok := variables[varName]; ok { + return val + } + return match + }) + + // $var + re = regexp.MustCompile(`\$(\w+)`) + str = re.ReplaceAllStringFunc(str, func(match string) string { + varName := strings.TrimPrefix(match, "$") + if val, ok := variables[varName]; ok { + return val + } + return match + }) + + return str +} + +// cleanVersion handles version ranges and classifiers +func cleanVersion(version string) string { + if version == "" { + return "latest" + } + // Check for any range or wildcard patterns + if strings.ContainsAny(version, "[]()^~*><") || strings.Contains(version, "+") { + return "latest" + } + // For now, keep classifiers as is + return version +} + +// findLineNumber finds the line number of a substring in content +func findLineNumber(content, substr string) int { + index := strings.Index(content, substr) + if index == -1 { + return 0 + } + return strings.Count(content[:index], "\n") + 1 +} + +// parsePropertiesInto parses key=value properties into the given map (does not overwrite existing keys) +func parsePropertiesInto(content string, vars map[string]string) { + for _, line := range strings.Split(content, "\n") { + line = strings.TrimSpace(line) + if strings.Contains(line, "=") && !strings.HasPrefix(line, "#") { + parts := strings.SplitN(line, "=", 2) + if len(parts) == 2 { + key := strings.TrimSpace(parts[0]) + if _, exists := vars[key]; !exists { + vars[key] = strings.TrimSpace(parts[1]) + } + } + } + } +} + +// findProjectRoot walks up from dir looking for settings.gradle or settings.gradle.kts +func findProjectRoot(dir string) string { + current := dir + for { + if _, err := os.Stat(filepath.Join(current, "settings.gradle")); err == nil { + return current + } + if _, err := os.Stat(filepath.Join(current, "settings.gradle.kts")); err == nil { + return current + } + parent := filepath.Dir(current) + if parent == current { + break + } + current = parent + } + return dir +} + +// isProjectReference checks if a dependency statement is a project reference +func isProjectReference(statement string) bool { + pattern := regexp.MustCompile(`(?i)\b(?:` + configKeywords + `)\s*(?:\(\s*)?project\s*\(`) + return pattern.MatchString(statement) +} + +// isFileReference checks if a dependency statement is a file reference (files/fileTree) +func isFileReference(statement string) bool { + pattern := regexp.MustCompile(`(?i)\b(?:` + configKeywords + `)\s*(?:\(\s*)?(?:files|fileTree)\s*\(`) + return pattern.MatchString(statement) +} + +// isVersionCatalogReference checks if a dependency uses version catalog syntax (libs.xxx) +func isVersionCatalogReference(statement string) bool { + pattern := regexp.MustCompile(`(?i)\b(?:` + configKeywords + `)\s*(?:\(\s*)?libs\.`) + return pattern.MatchString(statement) +} + +// normalizePlatformDependency strips platform() and enforcedPlatform() wrappers +func normalizePlatformDependency(statement string) string { + pattern := regexp.MustCompile(`\b(?:platform|enforcedPlatform)\s*\(\s*(['"][^'"]+['"])\s*\)`) + return pattern.ReplaceAllString(statement, "$1") +} diff --git a/internal/parsers/gradle/gradle_parser_test.go b/internal/parsers/gradle/gradle_parser_test.go new file mode 100644 index 0000000..20a4c1a --- /dev/null +++ b/internal/parsers/gradle/gradle_parser_test.go @@ -0,0 +1,897 @@ +package gradle + +import ( + "os" + "path/filepath" + "testing" + + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +func TestGradleParser_Parse(t *testing.T) { + tests := []struct { + name string + content string + expectedPkgs []models.Package + expectedError bool + }{ + { + name: "basic gradle file", + content: `plugins { + id 'java' +} + +ext { + springVersion = '5.3.0' +} + +dependencies { + implementation 'org.springframework:spring-core:5.3.0' + testImplementation 'junit:junit:4.13' + api 'com.google.guava:guava:30.1-jre' + implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0' +} + +buildscript { + dependencies { + classpath 'com.android.tools.build:gradle:7.0.0' + } +}`, + expectedPkgs: []models.Package{ + { + PackageManager: "gradle", + PackageName: "org.springframework:spring-core", + Version: "5.3.0", + Locations: []models.Location{ + {Line: 9}, + }, + }, + { + PackageManager: "gradle", + PackageName: "junit:junit", + Version: "4.13", + Locations: []models.Location{ + {Line: 10}, + }, + }, + { + PackageManager: "gradle", + PackageName: "com.google.guava:guava", + Version: "30.1-jre", + Locations: []models.Location{ + {Line: 11}, + }, + }, + { + PackageManager: "gradle", + PackageName: "org.apache.commons:commons-lang3", + Version: "3.12.0", + Locations: []models.Location{ + {Line: 12}, + }, + }, + { + PackageManager: "gradle", + PackageName: "com.android.tools.build:gradle", + Version: "7.0.0", + Locations: []models.Location{ + {Line: 17}, + }, + }, + }, + expectedError: false, + }, + { + name: "kotlin dsl dependency syntax", + content: `val kotlinVersion = "1.4.32" + +dependencies { + implementation("org.springframework:spring-core:$kotlinVersion") + implementation( + "org.apache.commons:commons-lang3:3.12.0" + ) + implementation(group = "com.google.guava", name = "guava", version = "30.1-jre") + if (project.hasProperty("feature")) { + testImplementation("junit:junit:$kotlinVersion") + } +}`, + expectedPkgs: []models.Package{ + { + PackageManager: "gradle", + PackageName: "org.springframework:spring-core", + Version: "1.4.32", + Locations: []models.Location{ + {Line: 3}, + }, + }, + { + PackageManager: "gradle", + PackageName: "org.apache.commons:commons-lang3", + Version: "3.12.0", + Locations: []models.Location{ + {Line: 4}, + }, + }, + { + PackageManager: "gradle", + PackageName: "com.google.guava:guava", + Version: "30.1-jre", + Locations: []models.Location{ + {Line: 7}, + }, + }, + { + PackageManager: "gradle", + PackageName: "junit:junit", + Version: "1.4.32", + Locations: []models.Location{ + {Line: 9}, + }, + }, + }, + expectedError: false, + }, + { + name: "multi-line and conditional dependencies", + content: `ext { + featureVersion = '1.0.0' +} + +dependencies { + implementation( + 'org.springframework:spring-core:5.3.0' + ) + implementation group: 'org.apache.commons', + name: 'commons-lang3', + version: '3.12.0' + if (project.hasProperty('feature')) { + testImplementation 'junit:junit:$featureVersion' + } + if (useRedux) { + api group: 'com.google.guava', + name: 'guava', + version: '30.1-jre' + } +}`, + expectedPkgs: []models.Package{ + { + PackageManager: "gradle", + PackageName: "org.springframework:spring-core", + Version: "5.3.0", + Locations: []models.Location{ + {Line: 5}, + }, + }, + { + PackageManager: "gradle", + PackageName: "org.apache.commons:commons-lang3", + Version: "3.12.0", + Locations: []models.Location{ + {Line: 8}, + }, + }, + { + PackageManager: "gradle", + PackageName: "junit:junit", + Version: "1.0.0", + Locations: []models.Location{ + {Line: 12}, + }, + }, + { + PackageManager: "gradle", + PackageName: "com.google.guava:guava", + Version: "30.1-jre", + Locations: []models.Location{ + {Line: 15}, + }, + }, + }, + expectedError: false, + }, + { + name: "gradle with version ranges", + content: `dependencies { + implementation 'org.springframework:spring-core:[1.0.0,2.0.0)' + implementation 'org.junit:junit:(1.0,2.0]' +}`, + expectedPkgs: []models.Package{ + { + PackageManager: "gradle", + PackageName: "org.springframework:spring-core", + Version: "latest", + Locations: []models.Location{ + {Line: 1}, + }, + }, + { + PackageManager: "gradle", + PackageName: "org.junit:junit", + Version: "latest", + Locations: []models.Location{ + {Line: 2}, + }, + }, + }, + expectedError: false, + }, + { + name: "gradle with prefix wildcards", + content: `dependencies { + implementation 'org.springframework:spring-core:1.0.+' + implementation 'org.junit:junit:4.12.*' +}`, + expectedPkgs: []models.Package{ + { + PackageManager: "gradle", + PackageName: "org.springframework:spring-core", + Version: "latest", + Locations: []models.Location{ + {Line: 1}, + }, + }, + { + PackageManager: "gradle", + PackageName: "org.junit:junit", + Version: "latest", + Locations: []models.Location{ + {Line: 2}, + }, + }, + }, + expectedError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a temporary file + tmpFile, err := os.CreateTemp("", "build.gradle") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + // Write content to temp file + _, err = tmpFile.WriteString(tt.content) + if err != nil { + t.Fatalf("Failed to write to temp file: %v", err) + } + tmpFile.Close() + + // Parse the file + parser := &GradleParser{} + pkgs, err := parser.Parse(tmpFile.Name()) + + if tt.expectedError && err == nil { + t.Errorf("Expected error but got none") + } + if !tt.expectedError && err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if len(pkgs) != len(tt.expectedPkgs) { + t.Errorf("Expected %d packages, got %d", len(tt.expectedPkgs), len(pkgs)) + } + + for i, pkg := range pkgs { + if i >= len(tt.expectedPkgs) { + break + } + expected := tt.expectedPkgs[i] + if pkg.PackageManager != expected.PackageManager || + pkg.PackageName != expected.PackageName || + pkg.Version != expected.Version { + t.Errorf("Package %d mismatch: got %+v, expected %+v", i, pkg, expected) + } + if len(pkg.Locations) > 0 && len(expected.Locations) > 0 { + if pkg.Locations[0].Line != expected.Locations[0].Line { + t.Errorf("Location line mismatch: got %d, expected %d", pkg.Locations[0].Line, expected.Locations[0].Line) + } + } + } + }) + } +} + +func TestGradleParser_ParseFile(t *testing.T) { + // Test with actual file + parser := &GradleParser{} + pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "build.gradle")) + if err != nil { + t.Fatalf("Failed to parse build.gradle: %v", err) + } + + if len(pkgs) == 0 { + t.Errorf("Expected packages, got none") + } + + for _, pkg := range pkgs { + if pkg.PackageManager != "gradle" { + t.Errorf("Expected package manager 'gradle', got '%s'", pkg.PackageManager) + } + if pkg.PackageName == "" { + t.Errorf("Package name is empty") + } + if pkg.Version == "" { + t.Errorf("Version is empty for %s", pkg.PackageName) + } + } +} + +func TestGradleParser_ParseFile_NoProjectReferences(t *testing.T) { + parser := &GradleParser{} + pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "build.gradle")) + if err != nil { + t.Fatalf("Failed to parse build.gradle: %v", err) + } + + for _, pkg := range pkgs { + if pkg.PackageName == ":core" || pkg.PackageName == ":app" || pkg.PackageName == ":security" { + t.Errorf("Project reference should not be extracted as a package: %s", pkg.PackageName) + } + } +} + +func TestGradleParser_ParseFile_VariableResolution(t *testing.T) { + parser := &GradleParser{} + pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "build.gradle")) + if err != nil { + t.Fatalf("Failed to parse build.gradle: %v", err) + } + + for _, pkg := range pkgs { + if pkg.PackageName == "org.springframework.boot:spring-boot-starter-web" { + if pkg.Version != "2.5.0" { + t.Errorf("Expected spring-boot-starter-web version '2.5.0', got '%s'", pkg.Version) + } + return + } + } + t.Errorf("Expected to find org.springframework.boot:spring-boot-starter-web in packages") +} + +func TestGradleParser_ProjectReferencesSkipped(t *testing.T) { + content := `dependencies { + implementation project(':core') + implementation(project(':lib')) + implementation 'org.apache.commons:commons-lang3:3.8' + api project(":shared") +}` + tmpFile, err := os.CreateTemp("", "build.gradle") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + tmpFile.WriteString(content) + tmpFile.Close() + + parser := &GradleParser{} + pkgs, err := parser.Parse(tmpFile.Name()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + if len(pkgs) != 1 { + t.Fatalf("Expected 1 package, got %d: %+v", len(pkgs), pkgs) + } + if pkgs[0].PackageName != "org.apache.commons:commons-lang3" { + t.Errorf("Expected commons-lang3, got %s", pkgs[0].PackageName) + } +} + +func TestGradleParser_PlatformDependencies(t *testing.T) { + content := `dependencies { + implementation platform('org.springframework.boot:spring-boot-dependencies:2.5.0') + implementation enforcedPlatform('com.google.cloud:libraries-bom:26.1.0') + implementation(platform("org.junit:junit-bom:5.9.0")) + implementation 'org.springframework:spring-core:5.3.0' +}` + tmpFile, err := os.CreateTemp("", "build.gradle") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + tmpFile.WriteString(content) + tmpFile.Close() + + parser := &GradleParser{} + pkgs, err := parser.Parse(tmpFile.Name()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + expectedPkgs := map[string]string{ + "org.springframework.boot:spring-boot-dependencies": "2.5.0", + "com.google.cloud:libraries-bom": "26.1.0", + "org.junit:junit-bom": "5.9.0", + "org.springframework:spring-core": "5.3.0", + } + + if len(pkgs) != len(expectedPkgs) { + t.Fatalf("Expected %d packages, got %d: %+v", len(expectedPkgs), len(pkgs), pkgs) + } + + for _, pkg := range pkgs { + expectedVersion, ok := expectedPkgs[pkg.PackageName] + if !ok { + t.Errorf("Unexpected package: %s", pkg.PackageName) + continue + } + if pkg.Version != expectedVersion { + t.Errorf("Package %s: expected version %s, got %s", pkg.PackageName, expectedVersion, pkg.Version) + } + } +} + +func TestGradleParser_FileReferencesSkipped(t *testing.T) { + content := `dependencies { + implementation files('libs/local.jar') + implementation fileTree(dir: 'libs', include: ['*.jar']) + implementation 'org.apache.commons:commons-lang3:3.8' +}` + tmpFile, err := os.CreateTemp("", "build.gradle") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + tmpFile.WriteString(content) + tmpFile.Close() + + parser := &GradleParser{} + pkgs, err := parser.Parse(tmpFile.Name()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + if len(pkgs) != 1 { + t.Fatalf("Expected 1 package, got %d: %+v", len(pkgs), pkgs) + } + if pkgs[0].PackageName != "org.apache.commons:commons-lang3" { + t.Errorf("Expected commons-lang3, got %s", pkgs[0].PackageName) + } +} + +func TestGradleParser_ExtendedConfigurations(t *testing.T) { + content := `dependencies { + debugImplementation 'com.facebook.stetho:stetho:1.6.0' + releaseImplementation 'com.google.firebase:firebase-crashlytics:18.0.0' + ksp 'com.google.dagger:dagger-compiler:2.44' + compileOnlyApi 'org.projectlombok:lombok:1.18.24' + testCompileOnly 'org.mockito:mockito-core:4.0.0' + lintChecks 'com.android.tools.lint:lint-checks:30.0.0' +}` + tmpFile, err := os.CreateTemp("", "build.gradle") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + tmpFile.WriteString(content) + tmpFile.Close() + + parser := &GradleParser{} + pkgs, err := parser.Parse(tmpFile.Name()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + expectedNames := []string{ + "com.facebook.stetho:stetho", + "com.google.firebase:firebase-crashlytics", + "com.google.dagger:dagger-compiler", + "org.projectlombok:lombok", + "org.mockito:mockito-core", + "com.android.tools.lint:lint-checks", + } + + if len(pkgs) != len(expectedNames) { + t.Fatalf("Expected %d packages, got %d: %+v", len(expectedNames), len(pkgs), pkgs) + } + + for i, pkg := range pkgs { + if pkg.PackageName != expectedNames[i] { + t.Errorf("Package %d: expected %s, got %s", i, expectedNames[i], pkg.PackageName) + } + } +} + +func TestGradleParser_CommentedExtBlocksIgnored(t *testing.T) { + content := ` +// ext { +// badVar = '0.0.0' +// } + +ext { + goodVar = '1.0.0' +} + +dependencies { + implementation "org.example:lib:$goodVar" +}` + tmpFile, err := os.CreateTemp("", "build.gradle") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + tmpFile.WriteString(content) + tmpFile.Close() + + parser := &GradleParser{} + pkgs, err := parser.Parse(tmpFile.Name()) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + if len(pkgs) != 1 { + t.Fatalf("Expected 1 package, got %d: %+v", len(pkgs), pkgs) + } + if pkgs[0].Version != "1.0.0" { + t.Errorf("Expected version '1.0.0' from non-commented ext block, got '%s'", pkgs[0].Version) + } +} + +func TestGradleParser_ParentGradleProperties(t *testing.T) { + // Create a directory structure: parent/child/ + parentDir, err := os.MkdirTemp("", "gradle-parent") + if err != nil { + t.Fatalf("Failed to create parent dir: %v", err) + } + defer os.RemoveAll(parentDir) + + childDir := filepath.Join(parentDir, "child") + os.Mkdir(childDir, 0755) + + // Create settings.gradle in parent to mark it as project root + os.WriteFile(filepath.Join(parentDir, "settings.gradle"), []byte("include ':child'"), 0644) + + // Create parent gradle.properties + os.WriteFile(filepath.Join(parentDir, "gradle.properties"), []byte("parentVersion=3.0.0\nsharedVersion=1.0.0"), 0644) + + // Create child gradle.properties (overrides sharedVersion) + os.WriteFile(filepath.Join(childDir, "gradle.properties"), []byte("sharedVersion=2.0.0"), 0644) + + // Create child build.gradle + buildContent := `dependencies { + implementation "org.example:parent-lib:$parentVersion" + implementation "org.example:shared-lib:$sharedVersion" +}` + buildFile := filepath.Join(childDir, "build.gradle") + os.WriteFile(buildFile, []byte(buildContent), 0644) + + parser := &GradleParser{} + pkgs, err := parser.Parse(buildFile) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + if len(pkgs) != 2 { + t.Fatalf("Expected 2 packages, got %d: %+v", len(pkgs), pkgs) + } + + // Parent property should be resolved + if pkgs[0].Version != "3.0.0" { + t.Errorf("Expected parent-lib version '3.0.0', got '%s'", pkgs[0].Version) + } + // Child property should take precedence over parent + if pkgs[1].Version != "2.0.0" { + t.Errorf("Expected shared-lib version '2.0.0' (child overrides parent), got '%s'", pkgs[1].Version) + } +} + +func TestVersionCatalog_Parse(t *testing.T) { + catalogContent := `[versions] +spring = "5.3.0" +guava = "30.1-jre" + +[libraries] +spring-core = { module = "org.springframework:spring-core", version.ref = "spring" } +spring-web = { module = "org.springframework:spring-web", version = "5.2.0" } +guava = "com.google.guava:guava:30.1-jre" +commons = { group = "org.apache.commons", name = "commons-lang3", version.ref = "spring" } +` + tmpFile, err := os.CreateTemp("", "libs.versions.toml") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + tmpFile.WriteString(catalogContent) + tmpFile.Close() + + catalog := parseVersionCatalog(tmpFile.Name()) + if catalog == nil { + t.Fatalf("Failed to parse version catalog") + } + + // Check versions + if catalog.Versions["spring"] != "5.3.0" { + t.Errorf("Expected spring version '5.3.0', got '%s'", catalog.Versions["spring"]) + } + if catalog.Versions["guava"] != "30.1-jre" { + t.Errorf("Expected guava version '30.1-jre', got '%s'", catalog.Versions["guava"]) + } + + // Check libraries + tests := []struct { + key string + group string + name string + version string + }{ + {"spring-core", "org.springframework", "spring-core", "5.3.0"}, + {"spring-web", "org.springframework", "spring-web", "5.2.0"}, + {"guava", "com.google.guava", "guava", "30.1-jre"}, + {"commons", "org.apache.commons", "commons-lang3", "5.3.0"}, + } + + for _, tt := range tests { + lib, ok := catalog.Libraries[tt.key] + if !ok { + t.Errorf("Library '%s' not found in catalog", tt.key) + continue + } + if lib.Group != tt.group { + t.Errorf("Library '%s': expected group '%s', got '%s'", tt.key, tt.group, lib.Group) + } + if lib.Name != tt.name { + t.Errorf("Library '%s': expected name '%s', got '%s'", tt.key, tt.name, lib.Name) + } + if lib.Version != tt.version { + t.Errorf("Library '%s': expected version '%s', got '%s'", tt.key, tt.version, lib.Version) + } + } +} + +func TestVersionCatalog_DependencyResolution(t *testing.T) { + // Create directory structure with version catalog + projectDir, err := os.MkdirTemp("", "gradle-catalog") + if err != nil { + t.Fatalf("Failed to create project dir: %v", err) + } + defer os.RemoveAll(projectDir) + + gradleDir := filepath.Join(projectDir, "gradle") + os.Mkdir(gradleDir, 0755) + + // Create settings.gradle to mark project root + os.WriteFile(filepath.Join(projectDir, "settings.gradle"), []byte(""), 0644) + + // Create version catalog + catalogContent := `[versions] +spring = "5.3.0" + +[libraries] +spring-core = { module = "org.springframework:spring-core", version.ref = "spring" } +guava = "com.google.guava:guava:30.1-jre" +` + os.WriteFile(filepath.Join(gradleDir, "libs.versions.toml"), []byte(catalogContent), 0644) + + // Create build.gradle with catalog references + buildContent := `dependencies { + implementation libs.spring.core + implementation(libs.guava) + implementation 'org.direct:dependency:1.0.0' +}` + buildFile := filepath.Join(projectDir, "build.gradle") + os.WriteFile(buildFile, []byte(buildContent), 0644) + + parser := &GradleParser{} + pkgs, err := parser.Parse(buildFile) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + expectedPkgs := map[string]string{ + "org.direct:dependency": "1.0.0", + "org.springframework:spring-core": "5.3.0", + "com.google.guava:guava": "30.1-jre", + } + + if len(pkgs) != len(expectedPkgs) { + t.Fatalf("Expected %d packages, got %d: %+v", len(expectedPkgs), len(pkgs), pkgs) + } + + for _, pkg := range pkgs { + expectedVersion, ok := expectedPkgs[pkg.PackageName] + if !ok { + t.Errorf("Unexpected package: %s", pkg.PackageName) + continue + } + if pkg.Version != expectedVersion { + t.Errorf("Package %s: expected version %s, got %s", pkg.PackageName, expectedVersion, pkg.Version) + } + } +} + +func TestIsProjectReference(t *testing.T) { + tests := []struct { + input string + expected bool + }{ + {"implementation project(':core')", true}, + {"implementation(project(':core'))", true}, + {`implementation project(":core")`, true}, + {"api project(':shared')", true}, + {"implementation 'org.example:lib:1.0'", false}, + {`implementation("org.example:lib:1.0")`, false}, + } + + for _, tt := range tests { + result := isProjectReference(tt.input) + if result != tt.expected { + t.Errorf("isProjectReference(%q) = %v, want %v", tt.input, result, tt.expected) + } + } +} + +func TestIsFileReference(t *testing.T) { + tests := []struct { + input string + expected bool + }{ + {"implementation files('libs/local.jar')", true}, + {"implementation fileTree(dir: 'libs', include: ['*.jar'])", true}, + {"implementation(files('libs/local.jar'))", true}, + {"implementation 'org.example:lib:1.0'", false}, + } + + for _, tt := range tests { + result := isFileReference(tt.input) + if result != tt.expected { + t.Errorf("isFileReference(%q) = %v, want %v", tt.input, result, tt.expected) + } + } +} + +func TestNormalizePlatformDependency(t *testing.T) { + tests := []struct { + input string + expected string + }{ + { + "implementation platform('org.springframework.boot:spring-boot-dependencies:2.5.0')", + "implementation 'org.springframework.boot:spring-boot-dependencies:2.5.0'", + }, + { + "implementation enforcedPlatform('com.google.cloud:libraries-bom:26.1.0')", + "implementation 'com.google.cloud:libraries-bom:26.1.0'", + }, + { + `implementation(platform("org.junit:junit-bom:5.9.0"))`, + `implementation("org.junit:junit-bom:5.9.0")`, + }, + { + "implementation 'org.example:lib:1.0'", + "implementation 'org.example:lib:1.0'", + }, + } + + for _, tt := range tests { + result := normalizePlatformDependency(tt.input) + if result != tt.expected { + t.Errorf("normalizePlatformDependency(%q) = %q, want %q", tt.input, result, tt.expected) + } + } +} + +func TestVersionCatalogParser_ParseFile(t *testing.T) { + // Test parsing libs.versions.toml directly + parser := &VersionCatalogParser{} + pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "gradle", "libs.versions.toml")) + if err != nil { + t.Fatalf("Failed to parse libs.versions.toml: %v", err) + } + + if len(pkgs) == 0 { + t.Errorf("Expected packages from version catalog, got none") + } + + // Verify expected packages are present + expectedPackages := map[string]string{ + "org.springframework:spring-core": "5.3.20", + "org.springframework.boot:spring-boot-starter-web": "2.7.0", + "com.google.guava:guava": "31.1-jre", + "org.apache.logging.log4j:log4j-core": "2.17.1", + } + + found := make(map[string]bool) + for _, pkg := range pkgs { + if expectedVersion, ok := expectedPackages[pkg.PackageName]; ok { + found[pkg.PackageName] = true + if pkg.Version != expectedVersion { + t.Errorf("Package %s: expected version %s, got %s", pkg.PackageName, expectedVersion, pkg.Version) + } + if pkg.PackageManager != "gradle" { + t.Errorf("Expected package manager 'gradle', got '%s'", pkg.PackageManager) + } + } + } + + for pkgName := range expectedPackages { + if !found[pkgName] { + t.Errorf("Expected package not found: %s", pkgName) + } + } +} + +// TestGradleParser_LocationIndices asserts that the Gradle parser populates +// StartIndex and EndIndex on each Location, not just Line. +func TestGradleParser_LocationIndices(t *testing.T) { + parser := &GradleParser{} + pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "build.gradle")) + if err != nil { + t.Fatalf("Failed to parse build.gradle: %v", err) + } + + // build.gradle line 40 (1-based): + // implementation 'org.apache.logging.log4j:log4j-core:2.14.0' // Log4Shell + // 8 spaces + "implementation 'org.apache.logging.log4j:log4j-core:2.14.0'" (= 8 + 59 = 67) + cases := map[string]struct { + line, startIdx, endIdx int + }{ + "org.apache.logging.log4j:log4j-core": {39, 8, 67}, + "commons-collections:commons-collections": {40, 8, 70}, + "org.springframework:spring-web": {45, 8, 69}, + } + + for _, pkg := range pkgs { + want, ok := cases[pkg.PackageName] + if !ok { + continue + } + if len(pkg.Locations) == 0 { + t.Errorf("%s: no Locations", pkg.PackageName) + continue + } + got := pkg.Locations[0] + if got.Line != want.line || got.StartIndex != want.startIdx || got.EndIndex != want.endIdx { + t.Errorf("%s: got Location{Line=%d, Start=%d, End=%d}, want {Line=%d, Start=%d, End=%d}", + pkg.PackageName, got.Line, got.StartIndex, got.EndIndex, want.line, want.startIdx, want.endIdx) + } + } +} + +// TestComputeGradleLocations_MultiLine asserts that a dependency spanning multiple +// source lines produces one Location per non-empty contributing line (Maven-style). +func TestComputeGradleLocations_MultiLine(t *testing.T) { + raws := []rawLineInfo{ + {LineNum: 5, Content: " implementation("}, + {LineNum: 6, Content: " \"org.springframework:spring-core:5.3.0\""}, + {LineNum: 7, Content: " )"}, + } + locs := computeGradleLocations(raws) + if len(locs) != 3 { + t.Fatalf("expected 3 Locations, got %d", len(locs)) + } + want := []models.Location{ + {Line: 5, StartIndex: 4, EndIndex: 19}, // " implementation(" length 19 + {Line: 6, StartIndex: 8, EndIndex: 47}, // 8 spaces + "\"org.springframework:spring-core:5.3.0\"" (39) = 47 + {Line: 7, StartIndex: 4, EndIndex: 5}, // " )" length 5 + } + for i, w := range want { + if locs[i] != w { + t.Errorf("loc[%d]: got %+v, want %+v", i, locs[i], w) + } + } +} + +// TestStripInlineComment verifies trailing // comments are removed but // inside +// strings is preserved. +func TestStripInlineComment(t *testing.T) { + cases := []struct{ in, out string }{ + {"implementation 'foo:bar:1.0' // comment", "implementation 'foo:bar:1.0' "}, + {`implementation "https://example.com"`, `implementation "https://example.com"`}, + {"no comment here", "no comment here"}, + {"// whole line is a comment", ""}, + } + for _, c := range cases { + if got := stripInlineComment(c.in); got != c.out { + t.Errorf("stripInlineComment(%q) = %q, want %q", c.in, got, c.out) + } + } +} diff --git a/internal/parsers/gradle/version_catalog.go b/internal/parsers/gradle/version_catalog.go new file mode 100644 index 0000000..05db468 --- /dev/null +++ b/internal/parsers/gradle/version_catalog.go @@ -0,0 +1,285 @@ +package gradle + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +// VersionCatalogParser implements parsing of Gradle version catalogs (libs.versions.toml) +type VersionCatalogParser struct{} + +// Parse implements the Parser interface for version catalog files +func (p *VersionCatalogParser) Parse(manifestFile string) ([]models.Package, error) { + catalog := parseVersionCatalog(manifestFile) + if catalog == nil { + return nil, fmt.Errorf("failed to parse version catalog: %w", fmt.Errorf("invalid TOML format")) + } + + var packages []models.Package + + // Convert catalog libraries to packages + for _, lib := range catalog.Libraries { + if lib.Group != "" && lib.Name != "" { + version := lib.Version + if version == "" { + version = "latest" + } + packages = append(packages, models.Package{ + PackageManager: "gradle", + PackageName: lib.Group + ":" + lib.Name, + Version: version, + FilePath: manifestFile, + Locations: []models.Location{{ + Line: lib.Line, + StartIndex: lib.StartIndex, + EndIndex: lib.EndIndex, + }}, + }) + } + } + + return packages, nil +} + +// VersionCatalog represents a parsed Gradle version catalog (libs.versions.toml) +type VersionCatalog struct { + Versions map[string]string + Libraries map[string]CatalogLibrary +} + +// CatalogLibrary represents a library entry in the version catalog +type CatalogLibrary struct { + Group string + Name string + Version string + Line int // 0-based line number in the TOML file + StartIndex int // offset of first non-whitespace character on the line + EndIndex int // offset just past the last non-whitespace character on the line +} + +// findVersionCatalog locates gradle/libs.versions.toml relative to the project root +func findVersionCatalog(manifestFile string) string { + projectRoot := findProjectRoot(filepath.Dir(manifestFile)) + catalogPath := filepath.Join(projectRoot, "gradle", "libs.versions.toml") + if _, err := os.Stat(catalogPath); err == nil { + return catalogPath + } + return "" +} + +// parseVersionCatalog reads and parses a libs.versions.toml file +func parseVersionCatalog(path string) *VersionCatalog { + content, err := os.ReadFile(path) + if err != nil { + return nil + } + + catalog := &VersionCatalog{ + Versions: make(map[string]string), + Libraries: make(map[string]CatalogLibrary), + } + + lines := strings.Split(string(content), "\n") + // Strip trailing \r so byte offsets are consistent on CRLF files + for i := range lines { + lines[i] = strings.TrimRight(lines[i], "\r") + } + currentSection := "" + + sectionPattern := regexp.MustCompile(`^\s*\[(\w+)\]\s*$`) + simpleKV := regexp.MustCompile(`^\s*([^\s=]+)\s*=\s*"([^"]+)"\s*$`) + + for lineIdx, raw := range lines { + trimmed := strings.TrimSpace(raw) + if trimmed == "" || strings.HasPrefix(trimmed, "#") { + continue + } + + if match := sectionPattern.FindStringSubmatch(trimmed); len(match) > 1 { + currentSection = match[1] + continue + } + + switch currentSection { + case "versions": + if match := simpleKV.FindStringSubmatch(trimmed); len(match) > 2 { + catalog.Versions[match[1]] = match[2] + } + case "libraries": + parseCatalogLibraryEntry(trimmed, raw, lineIdx, catalog) + } + } + + // Resolve version.ref references + for key, lib := range catalog.Libraries { + if strings.HasPrefix(lib.Version, "ref:") { + refName := strings.TrimPrefix(lib.Version, "ref:") + if resolved, ok := catalog.Versions[refName]; ok { + lib.Version = resolved + catalog.Libraries[key] = lib + } + } + } + + return catalog +} + +// parseCatalogLibraryEntry parses a single library line from the version catalog. +// trimmed is the whitespace-stripped line content used for regex matching; +// raw is the original line used to compute byte offsets for Location indices. +func parseCatalogLibraryEntry(trimmed, raw string, lineIdx int, catalog *VersionCatalog) { + startIdx, endIdx := lineExtent(raw) + + // Pattern: key = "group:name:version" + simplePattern := regexp.MustCompile(`^\s*([^\s=]+)\s*=\s*"([^"]+)"\s*$`) + if match := simplePattern.FindStringSubmatch(trimmed); len(match) > 2 { + parts := strings.Split(match[2], ":") + if len(parts) >= 2 { + lib := CatalogLibrary{ + Group: parts[0], + Name: parts[1], + Line: lineIdx, + StartIndex: startIdx, + EndIndex: endIdx, + } + if len(parts) >= 3 { + lib.Version = parts[2] + } + catalog.Libraries[match[1]] = lib + return + } + } + + // Pattern: key = { module = "group:name", version.ref = "xxx" } + // Pattern: key = { module = "group:name", version = "xxx" } + // Pattern: key = { group = "g", name = "n", version.ref = "xxx" } + // Pattern: key = { group = "g", name = "n", version = "xxx" } + kvPattern := regexp.MustCompile(`^\s*([^\s=]+)\s*=\s*\{(.+)\}\s*$`) + if match := kvPattern.FindStringSubmatch(trimmed); len(match) > 2 { + key := match[1] + body := match[2] + + lib := CatalogLibrary{} + + // Extract module = "group:name" + modulePattern := regexp.MustCompile(`module\s*=\s*"([^"]+)"`) + if m := modulePattern.FindStringSubmatch(body); len(m) > 1 { + parts := strings.Split(m[1], ":") + if len(parts) >= 2 { + lib.Group = parts[0] + lib.Name = parts[1] + } + } + + // Extract group/name separately + groupPattern := regexp.MustCompile(`group\s*=\s*"([^"]+)"`) + namePattern := regexp.MustCompile(`name\s*=\s*"([^"]+)"`) + if m := groupPattern.FindStringSubmatch(body); len(m) > 1 { + lib.Group = m[1] + } + if m := namePattern.FindStringSubmatch(body); len(m) > 1 { + lib.Name = m[1] + } + + // Extract version.ref or version + versionRefPattern := regexp.MustCompile(`version\.ref\s*=\s*"([^"]+)"`) + versionPattern := regexp.MustCompile(`(?:^|[^.])version\s*=\s*"([^"]+)"`) + if m := versionRefPattern.FindStringSubmatch(body); len(m) > 1 { + lib.Version = "ref:" + m[1] + } else if m := versionPattern.FindStringSubmatch(body); len(m) > 1 { + lib.Version = m[1] + } + + if lib.Group != "" && lib.Name != "" { + lib.Line = lineIdx + lib.StartIndex = startIdx + lib.EndIndex = endIdx + catalog.Libraries[key] = lib + } + } +} + +// lineExtent returns the offset of the first non-whitespace char and the offset +// just past the last non-whitespace char on the line. +func lineExtent(line string) (int, int) { + startIdx := len(line) - len(strings.TrimLeft(line, " \t")) + endIdx := len(strings.TrimRight(line, " \t")) + return startIdx, endIdx +} + +// catalogKeyToDependency resolves a version catalog accessor (e.g., "spring.core") +// to a library entry. In Gradle, dots in the accessor map to dashes in catalog keys. +func catalogKeyToDependency(ref string, catalog *VersionCatalog) *CatalogLibrary { + if catalog == nil { + return nil + } + + // In Gradle, dots in accessor map to dashes in catalog keys + // e.g., libs.spring.core -> spring-core + catalogKey := strings.ReplaceAll(ref, ".", "-") + + if lib, ok := catalog.Libraries[catalogKey]; ok { + return &lib + } + + return nil +} + +// parseVersionCatalogDependencies extracts dependencies from version catalog references in content +func parseVersionCatalogDependencies(content string, catalog *VersionCatalog) []models.Package { + if catalog == nil { + return nil + } + + var packages []models.Package + + // Match patterns like: + // implementation(libs.spring.core) + // implementation libs.spring.core + configPattern := `(?i)\b(` + configKeywords + `)\s*(?:\(\s*)?libs\.([a-zA-Z0-9.]+)\s*\)?` + pattern := regexp.MustCompile(configPattern) + + lines := strings.Split(content, "\n") + // Strip trailing \r so byte offsets are consistent on CRLF files + for i := range lines { + lines[i] = strings.TrimRight(lines[i], "\r") + } + for i, raw := range lines { + trimmed := strings.TrimSpace(raw) + if trimmed == "" || strings.HasPrefix(trimmed, "//") { + continue + } + + matches := pattern.FindAllStringSubmatch(trimmed, -1) + for _, match := range matches { + if len(match) > 2 { + ref := match[2] + lib := catalogKeyToDependency(ref, catalog) + if lib != nil && lib.Group != "" && lib.Name != "" { + startIdx, endIdx := lineExtent(stripInlineComment(raw)) + version := lib.Version + if version == "" { + version = "latest" + } + packages = append(packages, models.Package{ + PackageManager: "gradle", + PackageName: lib.Group + ":" + lib.Name, + Version: version, + Locations: []models.Location{{ + Line: i, + StartIndex: startIdx, + EndIndex: endIdx, + }}, + }) + } + } + } + } + + return packages +} diff --git a/internal/parsers/maven/maven-pom-parser.go b/internal/parsers/maven/maven-pom-parser.go index 45af00a..10dd625 100644 --- a/internal/parsers/maven/maven-pom-parser.go +++ b/internal/parsers/maven/maven-pom-parser.go @@ -209,7 +209,11 @@ func (p *MavenPomParser) Parse(manifestFile string) ([]models.Package, error) { } var packages []models.Package + // Strip \r for CRLF files so len(line) is correct on Windows lines := strings.Split(string(content), "\n") + for i := range lines { + lines[i] = strings.TrimRight(lines[i], "\r") + } // Process only direct dependencies (not managed ones to avoid duplicates) allDeps := project.Dependencies diff --git a/internal/parsers/poetry/poetry-pyproject-parser.go b/internal/parsers/poetry/poetry-pyproject-parser.go new file mode 100644 index 0000000..8032363 --- /dev/null +++ b/internal/parsers/poetry/poetry-pyproject-parser.go @@ -0,0 +1,326 @@ +package poetry + +import ( + "bufio" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +// PoetryPyprojectParser parses pyproject.toml Poetry dependency sections. +type PoetryPyprojectParser struct{} + +var ( + groupDepSectionRe = regexp.MustCompile(`^\[tool\.poetry\.group\.[^.]+\.dependencies\]$`) + inlineTableVersionRe = regexp.MustCompile(`version\s*=\s*"([^"]*)"`) + pep621OptDepSectionRe = regexp.MustCompile(`^\[project\.optional-dependencies\]$`) +) + +func isPoetryDepsSection(line string) bool { + return line == "[tool.poetry.dependencies]" || + line == "[tool.poetry.dev-dependencies]" || + groupDepSectionRe.MatchString(line) +} + +func parsePoetryVersion(v string) string { + v = strings.TrimSpace(v) + if v == "" { + return "latest" + } + if strings.Contains(v, "*") { + return "latest" + } + for _, op := range []string{"^", "~", ">", "<", ",", "!", "=", ";", "~="} { + if strings.Contains(v, op) { + return "latest" + } + } + return v +} + +func pyprojectLineIndices(raw, pkgName string) (int, int) { + startIdx := strings.Index(raw, pkgName) + if startIdx < 0 { + startIdx = 0 + } + endIdx := len(raw) + if commentIdx := strings.Index(raw, "#"); commentIdx >= 0 { + endIdx = commentIdx + } + endIdx = strings.LastIndexFunc(raw[:endIdx], func(r rune) bool { + return r != ' ' && r != '\t' + }) + 1 + return startIdx, endIdx +} + +func parsePyprojectDepLine(line string) (name, version string, ok bool) { + eqIdx := strings.Index(line, " = ") + if eqIdx < 0 { + return "", "", false + } + name = strings.TrimSpace(line[:eqIdx]) + if name == "" || name == "python" { + return "", "", false + } + + valueStr := strings.TrimSpace(line[eqIdx+3:]) + + if strings.HasPrefix(valueStr, "{") { + if m := inlineTableVersionRe.FindStringSubmatch(valueStr); m != nil { + version = m[1] + } else { + version = "latest" + } + } else if len(valueStr) >= 2 && valueStr[0] == '"' && valueStr[len(valueStr)-1] == '"' { + version = valueStr[1 : len(valueStr)-1] + } else { + version = "latest" + } + + return name, version, true +} + +// parseLockFile reads poetry.lock and returns a map of package name to version +func parseLockFile(manifestDir string) map[string]string { + lockVersions := make(map[string]string) + + lockPath := filepath.Join(manifestDir, "poetry.lock") + + file, err := os.Open(lockPath) + if err != nil { + return lockVersions + } + defer file.Close() + + scanner := bufio.NewScanner(file) + var currentPackageName string + + for scanner.Scan() { + line := scanner.Text() + trimmed := strings.TrimSpace(line) + + if strings.HasPrefix(trimmed, "[[package]]") { + currentPackageName = "" + continue + } + + if strings.HasPrefix(trimmed, "[") && !strings.HasPrefix(trimmed, "[[") { + currentPackageName = "" + continue + } + + if strings.HasPrefix(trimmed, "name = ") { + currentPackageName = strings.TrimSpace(strings.TrimPrefix(trimmed, "name = ")) + currentPackageName = strings.Trim(currentPackageName, "\"") + currentPackageName = strings.ToLower(currentPackageName) + continue + } + + if currentPackageName != "" && strings.HasPrefix(trimmed, "version = ") { + version := strings.TrimSpace(strings.TrimPrefix(trimmed, "version = ")) + version = strings.Trim(version, "\"") + lockVersions[currentPackageName] = version + currentPackageName = "" + continue + } + } + + return lockVersions +} + +// resolveVersionWithLock resolves version using poetry.lock if available +func resolveVersionWithLock(pkgName, version string, lockVersions map[string]string) string { + if !strings.ContainsAny(version, "^~><,!=;*") { + return version + } + + if strings.HasPrefix(version, "==") { + return strings.TrimSpace(version[2:]) + } + + if lockVersion, found := lockVersions[strings.ToLower(pkgName)]; found { + return lockVersion + } + + return "latest" +} + +func parsePep621Requirement(req string) (name, version string, ok bool) { + req = strings.TrimSpace(req) + if strings.HasPrefix(req, "\"") { + req = strings.TrimPrefix(req, "\"") + } + if strings.HasSuffix(req, "\",") { + req = strings.TrimSuffix(req, "\",") + } else if strings.HasSuffix(req, "\"") { + req = strings.TrimSuffix(req, "\"") + } + if strings.HasSuffix(req, ",") { + req = strings.TrimSuffix(req, ",") + } + req = strings.TrimSpace(req) + if req == "" { + return "", "", false + } + + for _, sep := range []string{"==", ">=", "<=", "~=", "!=", ">", "<", ";"} { + if idx := strings.Index(req, sep); idx >= 0 { + name = strings.TrimSpace(req[:idx]) + versionPart := strings.TrimSpace(req[idx+len(sep):]) + if idx2 := strings.Index(versionPart, ";"); idx2 >= 0 { + versionPart = strings.TrimSpace(versionPart[:idx2]) + } + version = sep + versionPart + return name, version, name != "" + } + } + + name = strings.TrimSpace(req) + return name, "latest", name != "" +} + +func (p *PoetryPyprojectParser) Parse(manifestFile string) ([]models.Package, error) { + file, err := os.Open(manifestFile) + if err != nil { + return nil, err + } + defer file.Close() + + manifestDir := filepath.Dir(manifestFile) + lockVersions := parseLockFile(manifestDir) + + var packages []models.Package + scanner := bufio.NewScanner(file) + lineNum := 0 + inPoetryDepsSection := false + inPep621Section := false + inPep621Array := false + skipUntilCloseBrace := false + + for scanner.Scan() { + raw := scanner.Text() + trimmed := strings.TrimSpace(raw) + + if skipUntilCloseBrace { + if strings.Contains(trimmed, "}") { + skipUntilCloseBrace = false + } + lineNum++ + continue + } + + if strings.HasPrefix(trimmed, "[") { + inPoetryDepsSection = isPoetryDepsSection(trimmed) + inPep621Section = trimmed == "[project]" || pep621OptDepSectionRe.MatchString(trimmed) + inPep621Array = false + lineNum++ + continue + } + + if inPep621Array { + if strings.TrimSpace(trimmed) == "]" { + inPep621Array = false + lineNum++ + continue + } + + name, version, ok := parsePep621Requirement(trimmed) + if ok { + resolvedVersion := resolveVersionWithLock(name, version, lockVersions) + startIdx, endIdx := pyprojectLineIndices(raw, name) + packages = append(packages, models.Package{ + PackageManager: "pypi", + PackageName: name, + Version: resolvedVersion, + FilePath: manifestFile, + Locations: []models.Location{{ + Line: lineNum, + StartIndex: startIdx, + EndIndex: endIdx, + }}, + }) + } + lineNum++ + continue + } + + if inPoetryDepsSection && (trimmed == "" || strings.HasPrefix(trimmed, "#")) { + lineNum++ + continue + } + + if inPoetryDepsSection { + line := trimmed + if idx := strings.Index(line, "#"); idx >= 0 { + line = strings.TrimSpace(line[:idx]) + } + + name, version, ok := parsePyprojectDepLine(line) + if !ok { + lineNum++ + continue + } + + valueStr := strings.TrimSpace(line[strings.Index(line, " = ")+3:]) + if strings.HasPrefix(valueStr, "{") && !strings.Contains(valueStr, "}") { + skipUntilCloseBrace = true + } + + resolvedVersion := resolveVersionWithLock(name, version, lockVersions) + + startIdx, endIdx := pyprojectLineIndices(raw, name) + packages = append(packages, models.Package{ + PackageManager: "pypi", + PackageName: name, + Version: resolvedVersion, + FilePath: manifestFile, + Locations: []models.Location{{ + Line: lineNum, + StartIndex: startIdx, + EndIndex: endIdx, + }}, + }) + lineNum++ + continue + } + + if (inPep621Section) && (strings.Contains(trimmed, " = [") || strings.Contains(trimmed, "=[")) && !strings.HasPrefix(trimmed, "[") { + openIdx := strings.Index(trimmed, "[") + closeIdx := strings.LastIndex(trimmed, "]") + if openIdx >= 0 && closeIdx > openIdx { + arrayContent := trimmed[openIdx+1 : closeIdx] + parts := strings.Split(arrayContent, ",") + for _, part := range parts { + name, version, ok := parsePep621Requirement(part) + if ok { + resolvedVersion := resolveVersionWithLock(name, version, lockVersions) + startIdx, endIdx := pyprojectLineIndices(raw, name) + packages = append(packages, models.Package{ + PackageManager: "pypi", + PackageName: name, + Version: resolvedVersion, + FilePath: manifestFile, + Locations: []models.Location{{ + Line: lineNum, + StartIndex: startIdx, + EndIndex: endIdx, + }}, + }) + } + } + } else if openIdx >= 0 { + inPep621Array = true + } + } + + lineNum++ + } + + if err := scanner.Err(); err != nil { + return nil, err + } + return packages, nil +} diff --git a/internal/parsers/poetry/poetry-pyproject-parser_test.go b/internal/parsers/poetry/poetry-pyproject-parser_test.go new file mode 100644 index 0000000..0cc24d7 --- /dev/null +++ b/internal/parsers/poetry/poetry-pyproject-parser_test.go @@ -0,0 +1,321 @@ +package poetry + +import ( + "os" + "path/filepath" + "testing" + + "github.com/Checkmarx/manifest-parser/internal/testdata" + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +func TestParsePyprojectExactVersion(t *testing.T) { + content := "[tool.poetry.dependencies]\nrequests = \"2.28.2\"\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + want := models.Package{ + PackageManager: "pypi", + PackageName: "requests", + Version: "2.28.2", + FilePath: filePath, + Locations: []models.Location{{ + Line: 1, + StartIndex: 0, + EndIndex: 19, + }}, + } + testdata.ValidatePackages(t, pkgs, []models.Package{want}) +} + +func TestParsePyprojectRangedVersion(t *testing.T) { + content := "[tool.poetry.dependencies]\nflask = \"^2.3.0\"\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].Version != "latest" { + t.Errorf("expected version %q, got %q", "latest", pkgs[0].Version) + } +} + +func TestParsePyprojectSkipsPython(t *testing.T) { + content := "[tool.poetry.dependencies]\npython = \"^3.9\"\nrequests = \"2.28.2\"\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package (python skipped), got %d", len(pkgs)) + } + if pkgs[0].PackageName != "requests" { + t.Errorf("expected package %q, got %q", "requests", pkgs[0].PackageName) + } +} + +func TestParsePyprojectDevDependencies(t *testing.T) { + content := "[tool.poetry.dev-dependencies]\npytest = \"7.2.0\"\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + want := models.Package{ + PackageManager: "pypi", + PackageName: "pytest", + Version: "7.2.0", + FilePath: filePath, + Locations: []models.Location{{ + Line: 1, + StartIndex: 0, + EndIndex: 16, + }}, + } + testdata.ValidatePackages(t, pkgs, []models.Package{want}) +} + +func TestParsePyprojectGroupDependencies(t *testing.T) { + content := "[tool.poetry.group.lint.dependencies]\nblack = \"^22.0.0\"\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].PackageName != "black" { + t.Errorf("expected package %q, got %q", "black", pkgs[0].PackageName) + } + if pkgs[0].Version != "latest" { + t.Errorf("expected version %q, got %q", "latest", pkgs[0].Version) + } +} + +func TestParsePyprojectGroupExactVersion(t *testing.T) { + content := "[tool.poetry.group.test.dependencies]\npytest = \"7.4.0\"\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].Version != "7.4.0" { + t.Errorf("expected version %q, got %q", "7.4.0", pkgs[0].Version) + } +} + +func TestParsePyprojectInlineTable(t *testing.T) { + content := "[tool.poetry.dependencies]\nnumpy = {version = \"1.24.3\", optional = true}\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].PackageName != "numpy" { + t.Errorf("expected package %q, got %q", "numpy", pkgs[0].PackageName) + } + if pkgs[0].Version != "1.24.3" { + t.Errorf("expected version %q, got %q", "1.24.3", pkgs[0].Version) + } +} + +func TestParsePyprojectWildcardVersion(t *testing.T) { + content := "[tool.poetry.dependencies]\nrequests = \"*\"\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].Version != "latest" { + t.Errorf("expected version %q, got %q", "latest", pkgs[0].Version) + } +} + +func TestParsePyprojectPartialWildcard(t *testing.T) { + content := "[tool.poetry.dependencies]\nrequests = \"2.28.*\"\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].Version != "latest" { + t.Errorf("expected version %q, got %q", "latest", pkgs[0].Version) + } +} + +func TestParsePyprojectInlineComment(t *testing.T) { + content := "[tool.poetry.dependencies]\nrequests = \"2.28.2\" # pinned for security\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].Version != "2.28.2" { + t.Errorf("expected version %q, got %q", "2.28.2", pkgs[0].Version) + } +} + +func TestParsePyprojectNoDepSection(t *testing.T) { + content := "[build-system]\nrequires = [\"poetry-core>=1.0.0\"]\n[tool.black]\nline-length = 88\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 0 { + t.Fatalf("expected 0 packages, got %d", len(pkgs)) + } +} + +func TestParsePyprojectPep621Dependencies(t *testing.T) { + content := "[project]\ndependencies = [\n \"requests>=2.28.0\",\n \"flask==2.3.0\",\n]\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages, got %d", len(pkgs)) + } + if pkgs[0].PackageName != "requests" { + t.Errorf("expected %q, got %q", "requests", pkgs[0].PackageName) + } + if pkgs[0].Version != "latest" { + t.Errorf("expected version %q (ranged), got %q", "latest", pkgs[0].Version) + } + if pkgs[1].PackageName != "flask" { + t.Errorf("expected %q, got %q", "flask", pkgs[1].PackageName) + } + if pkgs[1].Version != "2.3.0" { + t.Errorf("expected version %q, got %q", "2.3.0", pkgs[1].Version) + } +} + +func TestParsePyprojectPep621OptionalDeps(t *testing.T) { + content := "[project.optional-dependencies]\ndev = [\n \"pytest>=7.0\",\n \"black>=22.0\",\n]\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "pyproject.toml") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages, got %d", len(pkgs)) + } + if pkgs[0].PackageName != "pytest" { + t.Errorf("expected %q, got %q", "pytest", pkgs[0].PackageName) + } + if pkgs[1].PackageName != "black" { + t.Errorf("expected %q, got %q", "black", pkgs[1].PackageName) + } +} + +func TestParsePyprojectRealFile(t *testing.T) { + filePath := "../../testdata/pyproject.toml" + parser := &PoetryPyprojectParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(pkgs) != 7 { + t.Fatalf("expected 7 packages, got %d", len(pkgs)) + } + + packageNames := make([]string, 0, len(pkgs)) + for _, pkg := range pkgs { + packageNames = append(packageNames, pkg.PackageName) + } + + expectedNames := []string{ + "requests", "flask", "Pillow", "cryptography", "pytest", "numpy", "pandas", + } + + for i, expectedName := range expectedNames { + if i < len(packageNames) { + if packageNames[i] != expectedName { + t.Errorf("package %d: expected %q, got %q", i, expectedName, packageNames[i]) + } + } + } +} diff --git a/internal/parsers/pypi/pypi-parser.go b/internal/parsers/pypi/pypi-parser.go index f984480..99026f2 100644 --- a/internal/parsers/pypi/pypi-parser.go +++ b/internal/parsers/pypi/pypi-parser.go @@ -10,9 +10,114 @@ import ( "github.com/Checkmarx/manifest-parser/pkg/parser/models" ) -// PypiParser implements parsing of requirements.txt +// PypiParser implements parsing of requirements.txt and related Python dependency files. +// Supports formats generated by pip freeze, pip-compile, pip-tools, uv export, and Poetry export. type PypiParser struct{} +// logicalLine represents a single dependency entry that may span multiple physical lines +// when line continuations (\) are used. +type logicalLine struct { + content string // joined and hash-stripped content + firstLine int // 0-indexed line number of the first physical line + rawFirst string // raw text of the first physical line (for index computation) +} + +// pipOptionPrefixes lists prefixes of pip CLI option lines that should be skipped. +var pipOptionPrefixes = []string{ + "-i ", "--index-url", "--extra-index-url", + "-r ", "--requirement", + "-c ", "--constraint", + "-e ", "--editable", + "-f ", "--find-links", + "--no-binary", "--only-binary", + "--pre", "--trusted-host", + "--hash=", +} + +// isPipOptionLine returns true if the trimmed line is a pip CLI option rather than a package spec. +func isPipOptionLine(trimmed string) bool { + for _, prefix := range pipOptionPrefixes { + if strings.HasPrefix(trimmed, prefix) { + return true + } + } + return false +} + +// stripHashOptions removes --hash= tokens from a line. +func stripHashOptions(line string) string { + tokens := strings.Fields(line) + var filtered []string + for _, tok := range tokens { + if !strings.HasPrefix(tok, "--hash=") { + filtered = append(filtered, tok) + } + } + return strings.Join(filtered, " ") +} + +// preprocessLines joins physical lines connected by trailing backslashes into logical lines, +// and strips --hash= options from the result. +func preprocessLines(lines []string) []logicalLine { + var result []logicalLine + var accumulator []string + firstLine := -1 + rawFirst := "" + + for i, raw := range lines { + trimmed := strings.TrimSpace(raw) + + if firstLine == -1 { + firstLine = i + rawFirst = raw + } + + if strings.HasSuffix(trimmed, "\\") { + // Strip the trailing backslash and accumulate + trimmed = strings.TrimSuffix(trimmed, "\\") + trimmed = strings.TrimSpace(trimmed) + if trimmed != "" { + accumulator = append(accumulator, trimmed) + } + continue + } + + // Line does not end with \, so this completes the logical line + if trimmed != "" { + accumulator = append(accumulator, trimmed) + } + + joined := strings.Join(accumulator, " ") + joined = stripHashOptions(joined) + joined = strings.TrimSpace(joined) + + result = append(result, logicalLine{ + content: joined, + firstLine: firstLine, + rawFirst: rawFirst, + }) + + // Reset for next logical line + accumulator = nil + firstLine = -1 + rawFirst = "" + } + + // Handle any remaining accumulated content (file ended with \) + if len(accumulator) > 0 { + joined := strings.Join(accumulator, " ") + joined = stripHashOptions(joined) + joined = strings.TrimSpace(joined) + result = append(result, logicalLine{ + content: joined, + firstLine: firstLine, + rawFirst: rawFirst, + }) + } + + return result +} + func extractPackageName(line string, re *regexp.Regexp, lineNum int, manifestFile string) (string, bool) { if match := re.FindStringSubmatch(line); match != nil { return match[1], true @@ -24,6 +129,13 @@ func extractPackageName(line string, re *regexp.Regexp, lineNum int, manifestFil func extractVersion(line string) string { var version string switch { + case strings.Contains(line, "==="): + parts := strings.SplitN(line, "===", 2) + if len(parts) == 2 { + version = strings.TrimSpace(parts[1]) + } else { + version = "latest" + } case strings.Contains(line, "=="): parts := strings.SplitN(line, "==", 2) if len(parts) == 2 { @@ -40,6 +152,52 @@ func extractVersion(line string) string { return version } +// vcsSchemes lists VCS prefixes used in pip requirements. +var vcsSchemes = []string{"git+", "hg+", "svn+", "bzr+"} + +// isVCSRequirement returns true if the line is a VCS-based requirement. +func isVCSRequirement(line string) bool { + for _, scheme := range vcsSchemes { + if strings.HasPrefix(line, scheme) { + return true + } + } + return false +} + +// extractVCSPackageName extracts the package name from a VCS requirement line +// using the #egg= fragment. Returns empty string if not found. +func extractVCSPackageName(line string) string { + if idx := strings.Index(line, "#egg="); idx >= 0 { + egg := line[idx+5:] + // egg name may be followed by & or whitespace + if ampIdx := strings.IndexAny(egg, "& \t"); ampIdx >= 0 { + egg = egg[:ampIdx] + } + return strings.TrimSpace(egg) + } + return "" +} + +// isURLRequirement returns true if the line contains a PEP 508 URL requirement (pkg @ URL). +func isURLRequirement(line string) bool { + return strings.Contains(line, " @ ") +} + +// extractURLPackageName extracts the package name from a URL requirement (pkg @ https://...). +func extractURLPackageName(line string) string { + parts := strings.SplitN(line, " @ ", 2) + if len(parts) == 2 { + name := strings.TrimSpace(parts[0]) + // Strip extras like pkg[extra] → pkg + if bracketIdx := strings.Index(name, "["); bracketIdx >= 0 { + name = name[:bracketIdx] + } + return name + } + return "" +} + func computeIndices(raw, pkgName string) (int, int) { // Find the start index of the package name startIdx := strings.Index(raw, pkgName) @@ -74,19 +232,55 @@ func (p *PypiParser) Parse(manifestFile string) ([]models.Package, error) { } defer file.Close() - var packages []models.Package + // Read all lines into a slice + var lines []string scanner := bufio.NewScanner(file) - lineNum := 0 + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + if err := scanner.Err(); err != nil { + return nil, err + } + // Preprocess: join continuation lines and strip hash options + logicalLines := preprocessLines(lines) + + var packages []models.Package re := regexp.MustCompile(`^([a-zA-Z0-9_\-\.]+)(?:\[.*\])?(?:[>==4.2,<6.0\nmylib===1.0.dev5\n-r other-requirements.txt\n--index-url https://pypi.org/simple\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "requirements.txt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PypiParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 5 { + t.Fatalf("expected 5 packages, got %d", len(pkgs)) + } + + // flask==3.1.0 + if pkgs[0].PackageName != "flask" || pkgs[0].Version != "3.1.0" { + t.Errorf("pkg 0: got %q==%q, want flask==3.1.0", pkgs[0].PackageName, pkgs[0].Version) + } + // requests @ URL + if pkgs[1].PackageName != "requests" || pkgs[1].Version != "latest" { + t.Errorf("pkg 1: got %q==%q, want requests==latest", pkgs[1].PackageName, pkgs[1].Version) + } + // git+...#egg=custom-pkg + if pkgs[2].PackageName != "custom-pkg" || pkgs[2].Version != "latest" { + t.Errorf("pkg 2: got %q==%q, want custom-pkg==latest", pkgs[2].PackageName, pkgs[2].Version) + } + // django>=3.2,<4.0 + if pkgs[3].PackageName != "django" || pkgs[3].Version != "latest" { + t.Errorf("pkg 3: got %q==%q, want django==latest", pkgs[3].PackageName, pkgs[3].Version) + } + // mylib===1.0.dev5 + if pkgs[4].PackageName != "mylib" || pkgs[4].Version != "1.0.dev5" { + t.Errorf("pkg 4: got %q==%q, want mylib==1.0.dev5", pkgs[4].PackageName, pkgs[4].Version) + } +} diff --git a/internal/parsers/pypi/testdata/requirements-pip-compile.txt b/internal/parsers/pypi/testdata/requirements-pip-compile.txt new file mode 100644 index 0000000..4a12b22 --- /dev/null +++ b/internal/parsers/pypi/testdata/requirements-pip-compile.txt @@ -0,0 +1,14 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile requirements.in +# +asgiref==3.8.1 + # via django +django==5.2.13 + # via -r requirements.in +sqlparse==0.5.5 + # via django +tzdata==2025.3 + # via django diff --git a/internal/parsers/pypi/testdata/requirements-pip-freeze.txt b/internal/parsers/pypi/testdata/requirements-pip-freeze.txt new file mode 100644 index 0000000..77210e9 --- /dev/null +++ b/internal/parsers/pypi/testdata/requirements-pip-freeze.txt @@ -0,0 +1,4 @@ +asgiref==3.8.1 +Django==5.2.13 +sqlparse==0.5.5 +tzdata==2025.3 diff --git a/internal/parsers/pypi/testdata/requirements-uv-export.txt b/internal/parsers/pypi/testdata/requirements-uv-export.txt new file mode 100644 index 0000000..281b691 --- /dev/null +++ b/internal/parsers/pypi/testdata/requirements-uv-export.txt @@ -0,0 +1,33 @@ +# This file was autogenerated by uv via the following command: +# uv export --no-dev --output-file requirements.txt +asgiref==3.8.1 \ + --hash=sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47 \ + --hash=sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590 + # via + # django + # sample-app +django==5.2.13 \ + --hash=sha256:a5cc92645b8eb50e38cdd2f9e6a12db171c61e3e6172a1a51b85e8ebc2291b42 \ + --hash=sha256:b5bb1d13cfe3b22e8a31d7a0bae2777a9c019a81d59ef4f72c8581f0d3e35f0e + # via sample-app +pycryptodome==3.21.0 \ + --hash=sha256:12ce0e6d32c4a63433cf26e9f5be9fd3a1c2cbe2bce1c3a834e3b5a43e8e82e0 \ + --hash=sha256:4d2cd4a5c4b939f2b5e2f8611a8b5c7f8c5a2de1f75c3e7c5e1c8f5a3c2b1e0a \ + --hash=sha256:7e3c5c2f1a4b8d9e0f1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a + # via sample-app +sqlparse==0.5.5 \ + --hash=sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca \ + --hash=sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946e4a7cf1a8b6e26cdc4b4 + # via + # django + # sample-app +typing-extensions==4.12.2 \ + --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ + --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 + # via + # asgiref + # sample-app +tzdata==2025.3 ; sys_platform == 'win32' \ + --hash=sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1 \ + --hash=sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7 + # via django diff --git a/internal/parsers/sbt/sbt-parser.go b/internal/parsers/sbt/sbt-parser.go new file mode 100644 index 0000000..954e729 --- /dev/null +++ b/internal/parsers/sbt/sbt-parser.go @@ -0,0 +1,252 @@ +package sbt + +import ( + "fmt" + "os" + "regexp" + "strings" + + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +// SbtParser implements parsing of SBT .sbt files (build.sbt, plugins.sbt, etc.) +type SbtParser struct{} + +var ( + // varRegex matches Scala variable declarations: + // val name = "value" + // lazy val name = "value" + // def name = "value" + varRegex = regexp.MustCompile(`^\s*(?:lazy\s+)?(?:val|def)\s+(\w+)\s*=\s*"([^"]+)"`) + + // depRegex matches SBT dependency declarations: + // "groupId" % "artifactId" % "version" + // "groupId" %% "artifactId" % "version" + // "groupId" %%% "artifactId" % "version" + // "groupId" % "artifactId" % variableName + // With optional trailing scope: % "test" or % Test + depRegex = regexp.MustCompile(`"([^"]+)"\s+(%{1,3})\s+"([^"]+)"\s+%\s+(?:"([^"]+)"|(\w+))(?:\s+%\s+(?:"[^"]*"|\w+))?`) +) + +// extractVariables scans lines for val declarations and returns a variable map +func extractVariables(lines []string) map[string]string { + vars := make(map[string]string) + inBlockComment := false + + for _, rawLine := range lines { + line := stripComments(rawLine, &inBlockComment) + if inBlockComment { + continue + } + if match := varRegex.FindStringSubmatch(line); match != nil { + vars[match[1]] = match[2] + } + } + + return vars +} + +// resolveVersion resolves a version string using the variable map +func resolveVersion(version string, vars map[string]string) string { + if version == "" { + return "latest" + } + // Check for any range or wildcard patterns + if strings.ContainsAny(version, "[]()^~*><") || strings.Contains(version, "+") { + return "latest" + } + // If it looks like a literal version (starts with digit or contains dots/hyphens typical of versions), return as-is + if len(version) > 0 && (version[0] >= '0' && version[0] <= '9') { + return version + } + // Try to resolve as a variable + if resolved, exists := vars[version]; exists { + return resolved + } + return "latest" +} + +// stripComments removes comments from a line and tracks block comment state +func stripComments(line string, inBlockComment *bool) string { + if *inBlockComment { + if idx := strings.Index(line, "*/"); idx >= 0 { + *inBlockComment = false + line = line[idx+2:] + } else { + return "" + } + } + + // Handle inline block comments: /* ... */ on the same line + for { + startIdx := strings.Index(line, "/*") + if startIdx < 0 { + break + } + endIdx := strings.Index(line[startIdx+2:], "*/") + if endIdx >= 0 { + // Block comment opens and closes on same line + line = line[:startIdx] + line[startIdx+2+endIdx+2:] + } else { + // Block comment opens but doesn't close — entering block comment + *inBlockComment = true + line = line[:startIdx] + break + } + } + + // Handle single-line comments + if idx := strings.Index(line, "//"); idx >= 0 { + line = line[:idx] + } + + return line +} + +// modifierKeywords are SBT dependency modifiers that should be excluded from the location span. +// The EndIndex should cover only the core "g" % "a" % "v" declaration. +var modifierKeywords = []string{ + "exclude(", + "excludeAll(", + "intransitive()", + "withSources()", + "withJavadoc()", + "classifier ", + "classifier(", + "cross ", + "cross(", +} + +// computeLocationIndices calculates start and end indices for a dependency in a raw line. +// StartIndex = position of the first quote of the groupId. +// EndIndex = end of the core dependency declaration, excluding modifiers, comments, and trailing punctuation. +func computeLocationIndices(rawLine string, groupId string) (int, int) { + // StartIndex: position of the first quote of the groupId + searchStr := `"` + groupId + `"` + startIdx := strings.Index(rawLine, searchStr) + if startIdx < 0 { + startIdx = 0 + } + + // Start with the full line + endIdx := len(rawLine) + + // If there's a trailing comment, stop before it + if commentIdx := strings.Index(rawLine, "//"); commentIdx >= 0 && commentIdx < endIdx { + endIdx = commentIdx + } + + // Trim known dependency modifiers first (before punctuation removal, + // so keywords like "intransitive()" are still intact when searched) + endIdx = trimModifiers(rawLine, startIdx, endIdx) + + // Trim trailing whitespace, commas, and closing parentheses + endIdx = trimTrailingPunctuation(rawLine, endIdx) + + return startIdx, endIdx +} + +// trimTrailingPunctuation removes trailing whitespace, commas, and closing parens from the end boundary +func trimTrailingPunctuation(line string, endIdx int) int { + for endIdx > 0 { + ch := line[endIdx-1] + if ch == ' ' || ch == '\t' || ch == ',' || ch == ')' { + endIdx-- + } else { + break + } + } + return endIdx +} + +// trimModifiers scans the region [startIdx, endIdx) for modifier keywords and truncates endIdx +// to exclude them. Works backwards so nested modifiers are stripped in order. +func trimModifiers(line string, startIdx int, endIdx int) int { + region := line[startIdx:endIdx] + for _, kw := range modifierKeywords { + if idx := strings.Index(region, kw); idx >= 0 { + // Truncate at the modifier keyword + candidate := startIdx + idx + // Only trim if the modifier comes after the core dependency (at least "g" % "a" % "v") + if candidate > startIdx && candidate < endIdx { + endIdx = candidate + } + } + } + return endIdx +} + +// Parse implements the Parser interface for SBT build.sbt files +func (p *SbtParser) Parse(manifestFile string) ([]models.Package, error) { + content, err := os.ReadFile(manifestFile) + if err != nil { + return nil, fmt.Errorf("failed to read manifest file: %w", err) + } + + lines := strings.Split(strings.ReplaceAll(string(content), "\r\n", "\n"), "\n") + + // Pass 1: Extract variable definitions + vars := extractVariables(lines) + + // Pass 2: Extract dependencies + var packages []models.Package + seen := make(map[string]bool) + inBlockComment := false + + for lineNum, rawLine := range lines { + line := stripComments(rawLine, &inBlockComment) + if inBlockComment { + continue + } + + line = strings.TrimSpace(line) + if line == "" { + continue + } + + // Try to extract dependency from this line + match := depRegex.FindStringSubmatch(line) + if match == nil { + continue + } + + groupId := match[1] + // match[2] is the operator (%, %%, %%%) — captured but not used + artifactId := match[3] + quotedVersion := match[4] // version from quoted string + bareVersion := match[5] // version from variable name + + var version string + if quotedVersion != "" { + version = resolveVersion(quotedVersion, vars) + } else if bareVersion != "" { + version = resolveVersion(bareVersion, vars) + } else { + version = "latest" + } + + // Build package key for duplicate detection + pkgKey := groupId + ":" + artifactId + if seen[pkgKey] { + continue + } + seen[pkgKey] = true + + // Calculate location + startIdx, endIdx := computeLocationIndices(rawLine, groupId) + + packages = append(packages, models.Package{ + PackageManager: "sbt", + PackageName: pkgKey, + Version: version, + FilePath: manifestFile, + Locations: []models.Location{{ + Line: lineNum, + StartIndex: startIdx, + EndIndex: endIdx, + }}, + }) + } + + return packages, nil +} diff --git a/internal/parsers/sbt/sbt-parser_test.go b/internal/parsers/sbt/sbt-parser_test.go new file mode 100644 index 0000000..f0d07ce --- /dev/null +++ b/internal/parsers/sbt/sbt-parser_test.go @@ -0,0 +1,892 @@ +package sbt + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/Checkmarx/manifest-parser/internal/testdata" + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +func TestParseSingleDependency(t *testing.T) { + content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + expected := []models.Package{ + { + PackageManager: "sbt", + PackageName: "org.example:test-lib", + Version: "1.0.0", + FilePath: filePath, + Locations: []models.Location{{ + Line: 0, + StartIndex: 23, + EndIndex: 59, + }}, + }, + } + testdata.ValidatePackages(t, pkgs, expected) +} + +func TestParseSingleDependencyDoublePercent(t *testing.T) { + content := `libraryDependencies += "org.typelevel" %% "cats-core" % "2.9.0" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + expected := []models.Package{ + { + PackageManager: "sbt", + PackageName: "org.typelevel:cats-core", + Version: "2.9.0", + FilePath: filePath, + Locations: []models.Location{{ + Line: 0, + StartIndex: 23, + EndIndex: 63, + }}, + }, + } + testdata.ValidatePackages(t, pkgs, expected) +} + +func TestParseSingleDependencyTriplePercent(t *testing.T) { + content := `libraryDependencies += "org.scala-js" %%% "scalajs-dom" % "2.4.0" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + expected := []models.Package{ + { + PackageManager: "sbt", + PackageName: "org.scala-js:scalajs-dom", + Version: "2.4.0", + FilePath: filePath, + Locations: []models.Location{{ + Line: 0, + StartIndex: 23, + EndIndex: 65, + }}, + }, + } + testdata.ValidatePackages(t, pkgs, expected) +} + +func TestParseSeqBlock(t *testing.T) { + content := `libraryDependencies ++= Seq( + "org.example" % "lib-a" % "1.0.0", + "org.example" % "lib-b" % "2.0.0" +) +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages, got %d", len(pkgs)) + } + + if pkgs[0].PackageName != "org.example:lib-a" { + t.Errorf("expected pkg[0].PackageName = org.example:lib-a, got %s", pkgs[0].PackageName) + } + if pkgs[0].Version != "1.0.0" { + t.Errorf("expected pkg[0].Version = 1.0.0, got %s", pkgs[0].Version) + } + if pkgs[1].PackageName != "org.example:lib-b" { + t.Errorf("expected pkg[1].PackageName = org.example:lib-b, got %s", pkgs[1].PackageName) + } + if pkgs[1].Version != "2.0.0" { + t.Errorf("expected pkg[1].Version = 2.0.0, got %s", pkgs[1].Version) + } +} + +func TestParseWithScope(t *testing.T) { + content := `libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.15" % "test" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + if pkgs[0].PackageName != "org.scalatest:scalatest" { + t.Errorf("expected PackageName = org.scalatest:scalatest, got %s", pkgs[0].PackageName) + } + if pkgs[0].Version != "3.2.15" { + t.Errorf("expected Version = 3.2.15, got %s", pkgs[0].Version) + } +} + +func TestParseWithVariableVersion(t *testing.T) { + content := `val jacksonVersion = "2.13.0" +libraryDependencies += "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + if pkgs[0].Version != "2.13.0" { + t.Errorf("expected Version = 2.13.0, got %s", pkgs[0].Version) + } +} + +func TestParseWithUnresolvableVariable(t *testing.T) { + content := `libraryDependencies += "org.example" % "test-lib" % unknownVar +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + if pkgs[0].Version != "latest" { + t.Errorf("expected Version = latest, got %s", pkgs[0].Version) + } +} + +func TestParseSingleLineComment(t *testing.T) { + content := `// "org.example" % "should-not-parse" % "1.0.0" +libraryDependencies += "org.example" % "real-lib" % "1.0.0" // inline comment +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + if pkgs[0].PackageName != "org.example:real-lib" { + t.Errorf("expected PackageName = org.example:real-lib, got %s", pkgs[0].PackageName) + } +} + +func TestParseBlockComment(t *testing.T) { + content := `/* + "org.example" % "should-not-parse" % "1.0.0" +*/ +libraryDependencies += "org.example" % "real-lib" % "2.0.0" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + if pkgs[0].PackageName != "org.example:real-lib" { + t.Errorf("expected PackageName = org.example:real-lib, got %s", pkgs[0].PackageName) + } + if pkgs[0].Version != "2.0.0" { + t.Errorf("expected Version = 2.0.0, got %s", pkgs[0].Version) + } +} + +func TestParseEmptyFile(t *testing.T) { + content := "" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 0 { + t.Fatalf("expected 0 packages, got %d", len(pkgs)) + } +} + +func TestParseDuplicateDependencies(t *testing.T) { + content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" +libraryDependencies += "org.example" % "test-lib" % "2.0.0" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package (duplicate skipped), got %d", len(pkgs)) + } + + if pkgs[0].Version != "1.0.0" { + t.Errorf("expected first occurrence version 1.0.0, got %s", pkgs[0].Version) + } +} + +func TestParseLocationAccuracy(t *testing.T) { + // Line: "org.example" % "test-lib" % "1.0.0" + // Positions: 0123456789... + content := `"org.example" % "test-lib" % "1.0.0" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + expected := []models.Package{ + { + PackageManager: "sbt", + PackageName: "org.example:test-lib", + Version: "1.0.0", + FilePath: filePath, + Locations: []models.Location{{ + Line: 0, + StartIndex: 0, + EndIndex: 36, + }}, + }, + } + testdata.ValidatePackages(t, pkgs, expected) +} + +func TestParseNonExistentFile(t *testing.T) { + parser := &SbtParser{} + _, err := parser.Parse("/nonexistent/build.sbt") + if err == nil { + t.Error("expected error for non-existent file, got none") + } +} + +func TestParseMixedOperators(t *testing.T) { + content := `libraryDependencies ++= Seq( + "org.example" % "lib-a" % "1.0.0", + "org.typelevel" %% "cats-core" % "2.9.0", + "org.scala-js" %%% "scalajs-dom" % "2.4.0" +) +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 3 { + t.Fatalf("expected 3 packages, got %d", len(pkgs)) + } + + if pkgs[0].PackageName != "org.example:lib-a" { + t.Errorf("expected pkg[0] = org.example:lib-a, got %s", pkgs[0].PackageName) + } + if pkgs[1].PackageName != "org.typelevel:cats-core" { + t.Errorf("expected pkg[1] = org.typelevel:cats-core, got %s", pkgs[1].PackageName) + } + if pkgs[2].PackageName != "org.scala-js:scalajs-dom" { + t.Errorf("expected pkg[2] = org.scala-js:scalajs-dom, got %s", pkgs[2].PackageName) + } +} + +func TestParseMalformedLine(t *testing.T) { + content := `libraryDependencies += "org.example" % "test-lib" +libraryDependencies += "org.example" % "real-lib" % "1.0.0" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package (malformed skipped), got %d", len(pkgs)) + } + + if pkgs[0].PackageName != "org.example:real-lib" { + t.Errorf("expected PackageName = org.example:real-lib, got %s", pkgs[0].PackageName) + } +} + +func TestResolveVersion(t *testing.T) { + vars := map[string]string{ + "jacksonVersion": "2.13.0", + "log4jVersion": "2.14.0", + } + + tests := []struct { + name string + version string + expected string + }{ + {"exact version", "1.2.3", "1.2.3"}, + {"variable lookup", "jacksonVersion", "2.13.0"}, + {"another variable", "log4jVersion", "2.14.0"}, + {"missing variable", "unknownVar", "latest"}, + {"empty version", "", "latest"}, + {"semver with pre-release", "2.0.0-RC1", "2.0.0-RC1"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := resolveVersion(tt.version, vars) + if result != tt.expected { + t.Errorf("resolveVersion(%q) = %q, want %q", tt.version, result, tt.expected) + } + }) + } +} + +func TestParseWithVersionRanges(t *testing.T) { + content := `libraryDependencies ++= Seq( + "org.springframework" % "spring-core" % "[1.0.0,2.0.0)", + "org.junit" % "junit" % "(1.0,2.0]" +) +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages, got %d", len(pkgs)) + } + + if pkgs[0].Version != "latest" { + t.Errorf("expected version 'latest' for range, got %q", pkgs[0].Version) + } + if pkgs[1].Version != "latest" { + t.Errorf("expected version 'latest' for range, got %q", pkgs[1].Version) + } +} + +func TestParseWithPrefixWildcards(t *testing.T) { + content := `libraryDependencies ++= Seq( + "org.springframework" % "spring-core" % "1.0.+", + "org.junit" % "junit" % "4.12.*" +) +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages, got %d", len(pkgs)) + } + + if pkgs[0].Version != "latest" { + t.Errorf("expected version 'latest' for wildcard, got %q", pkgs[0].Version) + } + if pkgs[1].Version != "latest" { + t.Errorf("expected version 'latest' for wildcard, got %q", pkgs[1].Version) + } +} + +func TestStripComments(t *testing.T) { + tests := []struct { + name string + line string + inBlockComment bool + expected string + expectedBlock bool + }{ + {"no comments", `"org.example" % "lib" % "1.0"`, false, `"org.example" % "lib" % "1.0"`, false}, + {"single line comment", `"org.example" % "lib" % "1.0" // comment`, false, `"org.example" % "lib" % "1.0" `, false}, + {"full line comment", `// this is a comment`, false, ``, false}, + {"block comment start", `/* start of block`, false, ``, true}, + {"inside block comment", ` some content inside block`, true, ``, true}, + {"block comment end", `end of block */`, true, ``, false}, + {"inline block comment", `before /* inside */ after`, false, `before after`, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + inBlock := tt.inBlockComment + result := stripComments(tt.line, &inBlock) + if result != tt.expected { + t.Errorf("stripComments(%q) = %q, want %q", tt.line, result, tt.expected) + } + if inBlock != tt.expectedBlock { + t.Errorf("inBlockComment = %v, want %v", inBlock, tt.expectedBlock) + } + }) + } +} + +func TestExtractVariables(t *testing.T) { + lines := []string{ + `val jacksonVersion = "2.13.0"`, + `lazy val log4jVersion = "2.14.0"`, + `def strutsVersion = "2.5.20"`, + `// val commentedOut = "1.0.0"`, + `name := "my-project"`, + `val emptyLine`, + ` val indentedVar = "3.0.0"`, + ` lazy val indentedLazy = "4.0.0"`, + ` def indentedDef = "5.0.0"`, + } + + vars := extractVariables(lines) + + expected := map[string]string{ + "jacksonVersion": "2.13.0", + "log4jVersion": "2.14.0", + "strutsVersion": "2.5.20", + "indentedVar": "3.0.0", + "indentedLazy": "4.0.0", + "indentedDef": "5.0.0", + } + + if len(vars) != len(expected) { + t.Fatalf("expected %d variables, got %d: %v", len(expected), len(vars), vars) + } + + for key, want := range expected { + got, exists := vars[key] + if !exists { + t.Errorf("expected variable %q not found", key) + continue + } + if got != want { + t.Errorf("variable %q = %q, want %q", key, got, want) + } + } +} + +func TestParseAddSbtPlugin(t *testing.T) { + content := `addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.0") +addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2") +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "plugins.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages, got %d", len(pkgs)) + } + + if pkgs[0].PackageName != "com.eed3si9n:sbt-assembly" { + t.Errorf("expected pkg[0].PackageName = com.eed3si9n:sbt-assembly, got %s", pkgs[0].PackageName) + } + if pkgs[0].Version != "2.1.0" { + t.Errorf("expected pkg[0].Version = 2.1.0, got %s", pkgs[0].Version) + } + if pkgs[1].PackageName != "org.scalameta:sbt-scalafmt" { + t.Errorf("expected pkg[1].PackageName = org.scalameta:sbt-scalafmt, got %s", pkgs[1].PackageName) + } + if pkgs[1].Version != "2.5.2" { + t.Errorf("expected pkg[1].Version = 2.5.2, got %s", pkgs[1].Version) + } +} + +func TestParseLazyVal(t *testing.T) { + content := `lazy val myVersion = "3.1.0" +libraryDependencies += "org.example" % "test-lib" % myVersion +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].Version != "3.1.0" { + t.Errorf("expected Version = 3.1.0, got %s", pkgs[0].Version) + } +} + +func TestParseDef(t *testing.T) { + content := `def myVersion = "4.2.0" +libraryDependencies += "org.example" % "test-lib" % myVersion +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].Version != "4.2.0" { + t.Errorf("expected Version = 4.2.0, got %s", pkgs[0].Version) + } +} + +func TestParseWithExclude(t *testing.T) { + content := `libraryDependencies += "org.apache.hadoop" % "hadoop-common" % "3.3.4" exclude("org.slf4j", "slf4j-log4j12") +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].PackageName != "org.apache.hadoop:hadoop-common" { + t.Errorf("expected PackageName = org.apache.hadoop:hadoop-common, got %s", pkgs[0].PackageName) + } + if pkgs[0].Version != "3.3.4" { + t.Errorf("expected Version = 3.3.4, got %s", pkgs[0].Version) + } + // EndIndex should NOT include the exclude(...) modifier + loc := pkgs[0].Locations[0] + rawLine := `libraryDependencies += "org.apache.hadoop" % "hadoop-common" % "3.3.4" exclude("org.slf4j", "slf4j-log4j12")` + excludeStart := strings.Index(rawLine, " exclude(") + if loc.EndIndex > excludeStart { + t.Errorf("EndIndex %d extends into exclude(...) modifier (starts at %d)", loc.EndIndex, excludeStart) + } +} + +func TestParseWithIntransitive(t *testing.T) { + content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" intransitive() +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + // EndIndex should NOT include the intransitive() modifier + loc := pkgs[0].Locations[0] + rawLine := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" intransitive()` + modifierStart := strings.Index(rawLine, " intransitive()") + if loc.EndIndex > modifierStart { + t.Errorf("EndIndex %d extends into intransitive() modifier (starts at %d)", loc.EndIndex, modifierStart) + } +} + +func TestParseWithCross(t *testing.T) { + content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" cross CrossVersion.full +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + loc := pkgs[0].Locations[0] + rawLine := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" cross CrossVersion.full` + modifierStart := strings.Index(rawLine, " cross ") + if loc.EndIndex > modifierStart { + t.Errorf("EndIndex %d extends into cross modifier (starts at %d)", loc.EndIndex, modifierStart) + } +} + +func TestParseWithExcludeAll(t *testing.T) { + content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" excludeAll(ExclusionRule("org.slf4j")) +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + loc := pkgs[0].Locations[0] + rawLine := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" excludeAll(ExclusionRule("org.slf4j"))` + modifierStart := strings.Index(rawLine, " excludeAll(") + if loc.EndIndex > modifierStart { + t.Errorf("EndIndex %d extends into excludeAll(...) modifier (starts at %d)", loc.EndIndex, modifierStart) + } +} + +func TestParseDependencyOverrides(t *testing.T) { + content := `dependencyOverrides += "com.google.guava" % "guava" % "32.1.2-jre" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].PackageName != "com.google.guava:guava" { + t.Errorf("expected PackageName = com.google.guava:guava, got %s", pkgs[0].PackageName) + } + if pkgs[0].Version != "32.1.2-jre" { + t.Errorf("expected Version = 32.1.2-jre, got %s", pkgs[0].Version) + } +} + +func TestParseWithClassifier(t *testing.T) { + content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" % "test" classifier "tests" +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "build.sbt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SbtParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + if pkgs[0].PackageName != "org.example:test-lib" { + t.Errorf("expected PackageName = org.example:test-lib, got %s", pkgs[0].PackageName) + } + if pkgs[0].Version != "1.0.0" { + t.Errorf("expected Version = 1.0.0, got %s", pkgs[0].Version) + } + loc := pkgs[0].Locations[0] + rawLine := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" % "test" classifier "tests"` + modifierStart := strings.Index(rawLine, " classifier ") + if loc.EndIndex > modifierStart { + t.Errorf("EndIndex %d extends into classifier modifier (starts at %d)", loc.EndIndex, modifierStart) + } +} + +func TestSbtParser_Parse_RealFile(t *testing.T) { + parser := &SbtParser{} + manifestFile := "../../testdata/build.sbt" + packages, err := parser.Parse(manifestFile) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + // Verify package count: 10 deps (log4j, cats, jackson, struts, commons, snakeyaml, netty, scalajs, hadoop, guava) + if len(packages) != 10 { + t.Fatalf("expected 10 packages, got %d", len(packages)) + } + + // Validate key fields for each package + expected := []struct { + name string + version string + line int + }{ + {"org.apache.logging.log4j:log4j-core", "2.14.0", 10}, + {"org.typelevel:cats-core", "2.9.0", 13}, + {"com.fasterxml.jackson.core:jackson-databind", "2.13.0", 17}, + {"org.apache.struts:struts2-core", "2.5.20", 18}, + {"commons-collections:commons-collections", "3.2.1", 19}, + {"org.yaml:snakeyaml", "1.26", 20}, + {"io.netty:netty-codec-http", "4.1.68.Final", 21}, + {"org.scala-js:scalajs-dom", "2.4.0", 30}, + {"org.apache.hadoop:hadoop-common", "3.3.4", 33}, + {"com.google.guava:guava", "32.1.2-jre", 36}, + } + + for i, exp := range expected { + if packages[i].PackageManager != "sbt" { + t.Errorf("pkg[%d].PackageManager = %q, want %q", i, packages[i].PackageManager, "sbt") + } + if packages[i].PackageName != exp.name { + t.Errorf("pkg[%d].PackageName = %q, want %q", i, packages[i].PackageName, exp.name) + } + if packages[i].Version != exp.version { + t.Errorf("pkg[%d].Version = %q, want %q", i, packages[i].Version, exp.version) + } + if packages[i].FilePath != manifestFile { + t.Errorf("pkg[%d].FilePath = %q, want %q", i, packages[i].FilePath, manifestFile) + } + if len(packages[i].Locations) != 1 { + t.Errorf("pkg[%d] has %d locations, want 1", i, len(packages[i].Locations)) + continue + } + if packages[i].Locations[0].Line != exp.line { + t.Errorf("pkg[%d].Location.Line = %d, want %d", i, packages[i].Locations[0].Line, exp.line) + } + } + + // Verify hadoop exclude modifier is NOT included in EndIndex + hadoopPkg := packages[8] + if hadoopPkg.Locations[0].EndIndex > 71 { + t.Errorf("hadoop EndIndex %d should not extend into exclude(...) modifier", hadoopPkg.Locations[0].EndIndex) + } +} + +func TestSbtParser_Parse_PluginsFile(t *testing.T) { + parser := &SbtParser{} + manifestFile := "../../testdata/plugins.sbt" + packages, err := parser.Parse(manifestFile) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + expectedPackages := []models.Package{ + { + PackageManager: "sbt", + PackageName: "com.eed3si9n:sbt-assembly", + Version: "2.1.0", + FilePath: manifestFile, + }, + { + PackageManager: "sbt", + PackageName: "org.scalameta:sbt-scalafmt", + Version: "2.5.2", + FilePath: manifestFile, + }, + { + PackageManager: "sbt", + PackageName: "com.github.sbt:sbt-native-packager", + Version: "1.9.16", + FilePath: manifestFile, + }, + { + PackageManager: "sbt", + PackageName: "org.apache.log4j:log4j-core", + Version: "2.14.1", + FilePath: manifestFile, + }, + { + PackageManager: "sbt", + PackageName: "org.apache.commons:commons-compress", + Version: "1.20", + FilePath: manifestFile, + }, + { + PackageManager: "sbt", + PackageName: "commons-io:commons-io", + Version: "2.4", + FilePath: manifestFile, + }, + } + + if len(packages) != len(expectedPackages) { + t.Fatalf("expected %d packages, got %d", len(expectedPackages), len(packages)) + } + + for i, pkg := range packages { + if pkg.PackageManager != expectedPackages[i].PackageManager { + t.Errorf("pkg[%d].PackageManager = %q, want %q", i, pkg.PackageManager, expectedPackages[i].PackageManager) + } + if pkg.PackageName != expectedPackages[i].PackageName { + t.Errorf("pkg[%d].PackageName = %q, want %q", i, pkg.PackageName, expectedPackages[i].PackageName) + } + if pkg.Version != expectedPackages[i].Version { + t.Errorf("pkg[%d].Version = %q, want %q", i, pkg.Version, expectedPackages[i].Version) + } + } +} diff --git a/internal/parsers/setuptools/setup_cfg_parser.go b/internal/parsers/setuptools/setup_cfg_parser.go new file mode 100644 index 0000000..737d5a8 --- /dev/null +++ b/internal/parsers/setuptools/setup_cfg_parser.go @@ -0,0 +1,173 @@ +package setuptools + +import ( + "bufio" + "os" + "regexp" + "strings" + + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +type SetupCfgParser struct{} + +func extractVersion(line string) string { + var version string + switch { + case strings.Contains(line, "=="): + parts := strings.SplitN(line, "==", 2) + if len(parts) == 2 { + version = strings.TrimSpace(parts[1]) + if strings.Contains(version, "*") { + version = "latest" + } + } else { + version = "latest" + } + default: + version = "latest" + } + return version +} + +func extractPackageName(line string, re *regexp.Regexp) (string, bool) { + if match := re.FindStringSubmatch(line); match != nil { + return match[1], true + } + return "", false +} + +func computeIndices(raw, pkgName string) (int, int) { + startIdx := strings.Index(raw, pkgName) + if startIdx < 0 { + startIdx = strings.IndexFunc(raw, func(r rune) bool { + return r != ' ' && r != '\t' + }) + } + + endIdx := len(raw) + if commentIdx := strings.Index(raw, "#"); commentIdx >= 0 { + endIdx = commentIdx + } + + endIdx = strings.LastIndexFunc(raw[:endIdx], func(r rune) bool { + return r != ' ' && r != '\t' + }) + 1 + + return startIdx, endIdx +} + +func (p *SetupCfgParser) Parse(manifestFile string) ([]models.Package, error) { + file, err := os.Open(manifestFile) + if err != nil { + return nil, err + } + defer file.Close() + + var packages []models.Package + scanner := bufio.NewScanner(file) + lineNum := 0 + + var currentSection string + var currentKey string + re := regexp.MustCompile(`^([a-zA-Z0-9_\-\.]+)(?:\[.*\])?(?:[>==2.0\n flask==2.0.1\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages with mixed indentation, got %d", len(pkgs)) + } +} + +func TestSetupCfgParser_PackageNameWithNumbers(t *testing.T) { + // Package names can start with numbers + content := "[options]\ninstall_requires =\n py2exe\n 3to2\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages with numeric names, got %d", len(pkgs)) + } +} + +func TestSetupCfgParser_DuplicateSections(t *testing.T) { + // Same section defined twice - second one should override + content := "[options]\ninstall_requires =\n flask==1.0.0\n[options]\ninstall_requires =\n flask==2.0.0\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Should find both (no dedup in parser) + if len(pkgs) < 1 { + t.Fatalf("expected at least 1 package with duplicate sections, got %d", len(pkgs)) + } +} + +func TestSetupCfgParser_VeryLongLine(t *testing.T) { + // Create a very long line with many dependencies + longDeps := "[options]\ninstall_requires =\n" + for i := 0; i < 100; i++ { + longDeps += " package" + string(rune(48+i%10)) + "\n" + } + + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(longDeps), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 100 { + t.Fatalf("expected 100 packages from long dependency list, got %d", len(pkgs)) + } +} + +func TestSetupCfgParser_UnicodePackageName(t *testing.T) { + // Package names with unicode (should be skipped as invalid) + content := "[options]\ninstall_requires =\n café-package\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Unicode shouldn't match the regex, so should be skipped + if len(pkgs) != 0 { + t.Fatalf("expected 0 packages with unicode name, got %d", len(pkgs)) + } +} + +func TestSetupCfgParser_VersionSpecifierEdgeCases(t *testing.T) { + content := "[options]\ninstall_requires =\n package1==1.0.0\n package2!=1.0.0\n package3~=1.0\n package4>1.0\n package5<2.0\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 5 { + t.Fatalf("expected 5 packages with various specifiers, got %d", len(pkgs)) + } + + versions := make(map[string]string) + for _, pkg := range pkgs { + versions[pkg.PackageName] = pkg.Version + } + + if versions["package1"] != "1.0.0" { + t.Errorf("package1: expected exact version, got %s", versions["package1"]) + } + if versions["package2"] != "latest" { + t.Errorf("package2 (!=): expected latest, got %s", versions["package2"]) + } + if versions["package3"] != "latest" { + t.Errorf("package3 (~=): expected latest, got %s", versions["package3"]) + } + if versions["package4"] != "latest" { + t.Errorf("package4 (>): expected latest, got %s", versions["package4"]) + } + if versions["package5"] != "latest" { + t.Errorf("package5 (<): expected latest, got %s", versions["package5"]) + } +} diff --git a/internal/parsers/setuptools/setup_cfg_parser_test.go b/internal/parsers/setuptools/setup_cfg_parser_test.go new file mode 100644 index 0000000..f69f8c8 --- /dev/null +++ b/internal/parsers/setuptools/setup_cfg_parser_test.go @@ -0,0 +1,261 @@ +package setuptools + +import ( + "os" + "path/filepath" + "testing" + + "github.com/Checkmarx/manifest-parser/internal/testdata" + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +func TestSetupCfgParser_ParseExactVersion(t *testing.T) { + content := "[options]\ninstall_requires =\n flask==2.0.1\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + got := pkgs[0] + want := models.Package{ + PackageManager: "pypi", + PackageName: "flask", + Version: "2.0.1", + FilePath: filePath, + Locations: []models.Location{{ + Line: 2, + StartIndex: 4, + EndIndex: 16, + }}, + } + testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want}) +} + +func TestSetupCfgParser_ParseRangeVersion(t *testing.T) { + content := "[options]\ninstall_requires =\n requests>=2.26.0\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + got := pkgs[0] + want := models.Package{ + PackageManager: "pypi", + PackageName: "requests", + Version: "latest", + FilePath: filePath, + Locations: []models.Location{{ + Line: 2, + StartIndex: 4, + EndIndex: 20, + }}, + } + testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want}) +} + +func TestSetupCfgParser_ParseMultipleDependencies(t *testing.T) { + content := "[options]\ninstall_requires =\n requests>=2.26.0\n flask==2.0.1\n six\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 3 { + t.Fatalf("expected 3 packages, got %d", len(pkgs)) + } + + expected := []models.Package{ + { + PackageManager: "pypi", + PackageName: "requests", + Version: "latest", + FilePath: filePath, + Locations: []models.Location{{Line: 2, StartIndex: 4, EndIndex: 20}}, + }, + { + PackageManager: "pypi", + PackageName: "flask", + Version: "2.0.1", + FilePath: filePath, + Locations: []models.Location{{Line: 3, StartIndex: 4, EndIndex: 16}}, + }, + { + PackageManager: "pypi", + PackageName: "six", + Version: "latest", + FilePath: filePath, + Locations: []models.Location{{Line: 4, StartIndex: 4, EndIndex: 7}}, + }, + } + testdata.ValidatePackages(t, pkgs, expected) +} + +func TestSetupCfgParser_ParseSetupRequires(t *testing.T) { + content := "[options]\nsetup_requires =\n setuptools>=42\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + got := pkgs[0] + want := models.Package{ + PackageManager: "pypi", + PackageName: "setuptools", + Version: "latest", + FilePath: filePath, + Locations: []models.Location{{Line: 2, StartIndex: 4, EndIndex: 18}}, + } + testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want}) +} + +func TestSetupCfgParser_ParseExtrasRequire(t *testing.T) { + content := "[options.extras_require]\ndev =\n pytest>=6.0\n black==22.3.0\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages, got %d", len(pkgs)) + } + + expected := []models.Package{ + { + PackageManager: "pypi", + PackageName: "pytest", + Version: "latest", + FilePath: filePath, + Locations: []models.Location{{Line: 2, StartIndex: 4, EndIndex: 15}}, + }, + { + PackageManager: "pypi", + PackageName: "black", + Version: "22.3.0", + FilePath: filePath, + Locations: []models.Location{{Line: 3, StartIndex: 4, EndIndex: 17}}, + }, + } + testdata.ValidatePackages(t, pkgs, expected) +} + +func TestSetupCfgParser_ParseSkipCommentLine(t *testing.T) { + content := "[options]\ninstall_requires =\n # commented out\n flask==2.0.1\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + got := pkgs[0] + want := models.Package{ + PackageManager: "pypi", + PackageName: "flask", + Version: "2.0.1", + FilePath: filePath, + Locations: []models.Location{{Line: 3, StartIndex: 4, EndIndex: 16}}, + } + testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want}) +} + +func TestSetupCfgParser_ParseInlineComment(t *testing.T) { + content := "[options]\ninstall_requires =\n flask==2.0.1 # web framework\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + got := pkgs[0] + want := models.Package{ + PackageManager: "pypi", + PackageName: "flask", + Version: "2.0.1", + FilePath: filePath, + Locations: []models.Location{{Line: 2, StartIndex: 4, EndIndex: 16}}, + } + testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want}) +} + +func TestSetupCfgParser_ParseNoDependencies(t *testing.T) { + content := "[metadata]\nname = my-package\nversion = 1.0.0\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.cfg") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 0 { + t.Fatalf("expected 0 packages, got %d", len(pkgs)) + } +} + +func TestSetupCfgParser_Parse_RealFile(t *testing.T) { + filePath := "../../testdata/setup.cfg" + parser := &SetupCfgParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + expected := []models.Package{ + {PackageManager: "pypi", PackageName: "requests", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 6, StartIndex: 4, EndIndex: 20}}}, + {PackageManager: "pypi", PackageName: "flask", Version: "2.0.1", FilePath: filePath, Locations: []models.Location{{Line: 7, StartIndex: 4, EndIndex: 16}}}, + {PackageManager: "pypi", PackageName: "six", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 8, StartIndex: 4, EndIndex: 7}}}, + {PackageManager: "pypi", PackageName: "Pillow", Version: "9.0.0", FilePath: filePath, Locations: []models.Location{{Line: 9, StartIndex: 4, EndIndex: 17}}}, + {PackageManager: "pypi", PackageName: "cryptography", Version: "2.9.2", FilePath: filePath, Locations: []models.Location{{Line: 10, StartIndex: 4, EndIndex: 23}}}, + {PackageManager: "pypi", PackageName: "setuptools", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 13, StartIndex: 4, EndIndex: 18}}}, + {PackageManager: "pypi", PackageName: "pytest", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 17, StartIndex: 4, EndIndex: 15}}}, + {PackageManager: "pypi", PackageName: "black", Version: "22.3.0", FilePath: filePath, Locations: []models.Location{{Line: 18, StartIndex: 4, EndIndex: 17}}}, + } + + testdata.ValidatePackages(t, pkgs, expected) +} diff --git a/internal/parsers/setuptools/setup_py_parser.go b/internal/parsers/setuptools/setup_py_parser.go new file mode 100644 index 0000000..32ba437 --- /dev/null +++ b/internal/parsers/setuptools/setup_py_parser.go @@ -0,0 +1,330 @@ +package setuptools + +import ( + "os" + "regexp" + "strings" + + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +type SetupPyParser struct{} + +type depWithPosition struct { + name string + version string + lineNum int + startIndex int + endIndex int +} + +func extractVersionPy(line string) string { + var version string + switch { + case strings.Contains(line, "=="): + parts := strings.SplitN(line, "==", 2) + if len(parts) == 2 { + version = strings.TrimSpace(parts[1]) + if strings.Contains(version, "*") { + version = "latest" + } + } else { + version = "latest" + } + default: + version = "latest" + } + return version +} + +func extractPackageNamePy(line string, re *regexp.Regexp) (string, bool) { + if match := re.FindStringSubmatch(line); match != nil { + return match[1], true + } + return "", false +} + +// findPositionInFile finds the exact line number and column position of text in the file +func findPositionInFile(fullText string, depString string, searchStartPos int) (lineNum, startIndex, endIndex int) { + searchPos := strings.Index(fullText[searchStartPos:], depString) + if searchPos == -1 { + return 0, 0, 0 + } + + actualPos := searchStartPos + searchPos + + lineNum = 0 + colPos := 0 + for i := 0; i < actualPos && i < len(fullText); i++ { + if fullText[i] == '\n' { + lineNum++ + colPos = 0 + } else { + colPos++ + } + } + + startIndex = colPos + endIndex = colPos + len(depString) + + return lineNum, startIndex, endIndex +} + +// extractDepsFromListContent extracts dependencies from list/dict content and returns positions +func extractDepsFromListContent(content string, fullText string, searchStartPos int) []depWithPosition { + var deps []depWithPosition + + singleQuoteRe := regexp.MustCompile(`'([^']*)'`) + doubleQuoteRe := regexp.MustCompile(`"([^"]*)`) + + singleMatches := singleQuoteRe.FindAllStringSubmatchIndex(content, -1) + doubleMatches := doubleQuoteRe.FindAllStringSubmatchIndex(content, -1) + + type match struct { + dep string + startInContent int + endInContent int + } + var allMatches []match + + for _, m := range singleMatches { + if len(m) >= 4 { + dep := content[m[2]:m[3]] + startPos := m[0] + + endPos := m[1] + if endPos < len(content) { + afterQuote := endPos + for afterQuote < len(content) && (content[afterQuote] == ' ' || content[afterQuote] == '\t') { + afterQuote++ + } + if afterQuote < len(content) && content[afterQuote] == ':' { + continue + } + } + + allMatches = append(allMatches, match{ + dep: dep, + startInContent: startPos, + endInContent: endPos, + }) + } + } + + for _, m := range doubleMatches { + if len(m) >= 4 { + dep := content[m[2]:m[3]] + startPos := m[0] + + endPos := m[1] + if endPos < len(content) { + afterQuote := endPos + for afterQuote < len(content) && (content[afterQuote] == ' ' || content[afterQuote] == '\t') { + afterQuote++ + } + if afterQuote < len(content) && content[afterQuote] == ':' { + continue + } + } + + allMatches = append(allMatches, match{ + dep: dep, + startInContent: startPos, + endInContent: endPos, + }) + } + } + + pkgNameRe := regexp.MustCompile(`^([a-zA-Z0-9_\-\.]+)(?:\[.*\])?(?:[>== len(text) { + return "" + } + + openChar := text[startPos] + var closeChar byte + switch openChar { + case '[': + closeChar = ']' + case '(': + closeChar = ')' + case '{': + closeChar = '}' + default: + return "" + } + + depth := 0 + inString := false + stringChar := byte(0) + escaped := false + + for i := startPos; i < len(text); i++ { + ch := text[i] + + if escaped { + escaped = false + continue + } + + if ch == '\\' { + escaped = true + continue + } + + if (ch == '"' || ch == '\'') && !inString { + inString = true + stringChar = ch + continue + } + + if inString && ch == stringChar { + inString = false + continue + } + + if inString { + continue + } + + if ch == openChar { + depth++ + } else if ch == closeChar { + depth-- + if depth == 0 { + return text[startPos+1 : i] + } + } + } + + return "" +} + +// extractDependencies extracts dependencies from setup() call text for a specific key +func extractDependencies(setupText string, key string, fullText string, searchStartPos int) []depWithPosition { + var deps []depWithPosition + + keyPattern := key + "=" + keyIndex := strings.Index(setupText, keyPattern) + if keyIndex == -1 { + return deps + } + + startPos := keyIndex + len(keyPattern) + for startPos < len(setupText) && (setupText[startPos] == ' ' || setupText[startPos] == '\t') { + startPos++ + } + + if startPos >= len(setupText) { + return deps + } + + content := extractListContent(setupText, startPos) + if content == "" { + return deps + } + + deps = extractDepsFromListContent(content, fullText, searchStartPos) + return deps +} + +func (p *SetupPyParser) Parse(manifestFile string) ([]models.Package, error) { + data, err := os.ReadFile(manifestFile) + if err != nil { + return nil, err + } + + text := string(data) + var packages []models.Package + + setupStart := strings.Index(text, "setup(") + if setupStart == -1 { + setupStart = 0 + } else { + setupStart += len("setup") + } + + setupContent := extractListContent(text, setupStart) + if setupContent == "" && setupStart > 0 { + setupContent = text[setupStart:] + } + + for _, key := range []string{"install_requires", "setup_requires", "tests_require"} { + keyPosInText := strings.Index(text, key+"=") + deps := extractDependencies(setupContent, key, text, keyPosInText) + for _, dep := range deps { + packages = append(packages, models.Package{ + PackageManager: "pypi", + PackageName: dep.name, + Version: dep.version, + FilePath: manifestFile, + Locations: []models.Location{{ + Line: dep.lineNum, + StartIndex: dep.startIndex, + EndIndex: dep.endIndex, + }}, + }) + } + } + + extrasStart := strings.Index(setupContent, "extras_require") + if extrasStart != -1 { + eqIndex := strings.Index(setupContent[extrasStart:], "=") + if eqIndex != -1 { + dictStartPos := extrasStart + eqIndex + 1 + for dictStartPos < len(setupContent) && (setupContent[dictStartPos] == ' ' || setupContent[dictStartPos] == '\t' || setupContent[dictStartPos] == '\n') { + dictStartPos++ + } + if dictStartPos < len(setupContent) { + dictContent := extractListContent(setupContent, dictStartPos) + if dictContent != "" { + extrasStartInText := strings.Index(text, "extras_require") + deps := extractDepsFromListContent(dictContent, text, extrasStartInText) + for _, dep := range deps { + packages = append(packages, models.Package{ + PackageManager: "pypi", + PackageName: dep.name, + Version: dep.version, + FilePath: manifestFile, + Locations: []models.Location{{ + Line: dep.lineNum, + StartIndex: dep.startIndex, + EndIndex: dep.endIndex, + }}, + }) + } + } + } + } + } + return packages, nil +} diff --git a/internal/parsers/setuptools/setup_py_parser_test.go b/internal/parsers/setuptools/setup_py_parser_test.go new file mode 100644 index 0000000..dc1e959 --- /dev/null +++ b/internal/parsers/setuptools/setup_py_parser_test.go @@ -0,0 +1,226 @@ +package setuptools + +import ( + "os" + "path/filepath" + "testing" + + "github.com/Checkmarx/manifest-parser/internal/testdata" + "github.com/Checkmarx/manifest-parser/pkg/parser/models" +) + +func TestSetupPyParser_ParseSingleInstallRequire(t *testing.T) { + content := "from setuptools import setup\n\nsetup(\n install_requires=['flask==2.0.1'],\n)\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.py") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupPyParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + got := pkgs[0] + if got.PackageManager != "pypi" || got.PackageName != "flask" || got.Version != "2.0.1" { + t.Errorf("got package %s %s, want flask 2.0.1", got.PackageName, got.Version) + } +} + +func TestSetupPyParser_ParseRangeInstallRequire(t *testing.T) { + content := "from setuptools import setup\n\nsetup(\n install_requires=['requests>=2.26.0'],\n)\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.py") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupPyParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + got := pkgs[0] + if got.PackageManager != "pypi" || got.PackageName != "requests" || got.Version != "latest" { + t.Errorf("got package %s %s, want requests latest", got.PackageName, got.Version) + } +} + +func TestSetupPyParser_ParseMultipleDependencies(t *testing.T) { + content := `from setuptools import setup + +setup( + install_requires=[ + 'requests>=2.26.0', + 'flask==2.0.1', + 'six', + ], +) +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.py") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupPyParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 3 { + t.Fatalf("expected 3 packages, got %d", len(pkgs)) + } + + packageNames := make(map[string]string) + for _, pkg := range pkgs { + packageNames[pkg.PackageName] = pkg.Version + } + + if packageNames["requests"] != "latest" { + t.Errorf("requests: expected latest, got %s", packageNames["requests"]) + } + if packageNames["flask"] != "2.0.1" { + t.Errorf("flask: expected 2.0.1, got %s", packageNames["flask"]) + } + if packageNames["six"] != "latest" { + t.Errorf("six: expected latest, got %s", packageNames["six"]) + } +} + +func TestSetupPyParser_ParseExtrasRequire(t *testing.T) { + content := `from setuptools import setup + +setup( + install_requires=['requests>=2.26.0'], + extras_require={ + 'dev': ['pytest>=6.0', 'black==22.3.0'], + }, +) +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.py") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupPyParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) < 3 { + t.Fatalf("expected at least 3 packages, got %d", len(pkgs)) + } + + packageNames := make(map[string]string) + for _, pkg := range pkgs { + packageNames[pkg.PackageName] = pkg.Version + } + + if packageNames["requests"] != "latest" { + t.Errorf("requests: expected latest, got %s", packageNames["requests"]) + } + if packageNames["pytest"] != "latest" { + t.Errorf("pytest: expected latest, got %s", packageNames["pytest"]) + } + if packageNames["black"] != "22.3.0" { + t.Errorf("black: expected 22.3.0, got %s", packageNames["black"]) + } +} + +func TestSetupPyParser_ParseTestsRequire(t *testing.T) { + content := "from setuptools import setup\n\nsetup(\n tests_require=['pytest'],\n)\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.py") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupPyParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 1 { + t.Fatalf("expected 1 package, got %d", len(pkgs)) + } + + got := pkgs[0] + if got.PackageManager != "pypi" || got.PackageName != "pytest" { + t.Errorf("got package %s, want pytest", got.PackageName) + } +} + +func TestSetupPyParser_ParseNoRequires(t *testing.T) { + content := "from setuptools import setup\n\nsetup(\n name='my-package',\n)\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.py") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupPyParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 0 { + t.Fatalf("expected 0 packages, got %d", len(pkgs)) + } +} + +func TestSetupPyParser_ParseWithDoubleQuotes(t *testing.T) { + content := `from setuptools import setup + +setup( + install_requires=[ + "requests>=2.26.0", + "flask==2.0.1", + ], +) +` + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "setup.py") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &SetupPyParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 2 { + t.Fatalf("expected 2 packages, got %d", len(pkgs)) + } + + packageNames := make(map[string]string) + for _, pkg := range pkgs { + packageNames[pkg.PackageName] = pkg.Version + } + + if packageNames["requests"] != "latest" { + t.Errorf("requests: expected latest, got %s", packageNames["requests"]) + } + if packageNames["flask"] != "2.0.1" { + t.Errorf("flask: expected 2.0.1, got %s", packageNames["flask"]) + } +} + +func TestSetupPyParser_Parse_RealFile(t *testing.T) { + filePath := "../../testdata/setup.py" + parser := &SetupPyParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + expected := []models.Package{ + {PackageManager: "pypi", PackageName: "requests", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 6, StartIndex: 9, EndIndex: 25}}}, + {PackageManager: "pypi", PackageName: "flask", Version: "2.0.1", FilePath: filePath, Locations: []models.Location{{Line: 7, StartIndex: 9, EndIndex: 21}}}, + {PackageManager: "pypi", PackageName: "six", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 8, StartIndex: 9, EndIndex: 12}}}, + {PackageManager: "pypi", PackageName: "Pillow", Version: "9.0.0", FilePath: filePath, Locations: []models.Location{{Line: 9, StartIndex: 9, EndIndex: 22}}}, + {PackageManager: "pypi", PackageName: "cryptography", Version: "2.9.2", FilePath: filePath, Locations: []models.Location{{Line: 10, StartIndex: 9, EndIndex: 28}}}, + {PackageManager: "pypi", PackageName: "pytest", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 19, StartIndex: 9, EndIndex: 15}}}, + {PackageManager: "pypi", PackageName: "pytest", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 14, StartIndex: 13, EndIndex: 24}}}, + {PackageManager: "pypi", PackageName: "black", Version: "22.3.0", FilePath: filePath, Locations: []models.Location{{Line: 15, StartIndex: 13, EndIndex: 26}}}, + } + + testdata.ValidatePackages(t, pkgs, expected) +} diff --git a/internal/testdata/build.sbt b/internal/testdata/build.sbt new file mode 100644 index 0000000..d8b0a94 --- /dev/null +++ b/internal/testdata/build.sbt @@ -0,0 +1,37 @@ +// Project settings +name := "vulnerable-test-project" +version := "1.0.0" +scalaVersion := "2.13.12" + +val jacksonVersion = "2.13.0" +lazy val log4jVersion = "2.14.0" +def strutsVersion = "2.5.20" + +// Single dependency with % — CVE-2021-44228 (Log4Shell) +libraryDependencies += "org.apache.logging.log4j" % "log4j-core" % log4jVersion + +// Single dependency with %% — safe dependency +libraryDependencies += "org.typelevel" %% "cats-core" % "2.9.0" + +// Seq block with mixed operators and vulnerable packages +libraryDependencies ++= Seq( + "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion, + "org.apache.struts" % "struts2-core" % strutsVersion, + "commons-collections" % "commons-collections" % "3.2.1", + "org.yaml" % "snakeyaml" % "1.26", + "io.netty" %% "netty-codec-http" % "4.1.68.Final" % "test" +) + +/* + This is a block comment — dependencies here should NOT be parsed + "org.example" % "should-not-parse" % "1.0.0" +*/ + +// Scala.js dependency with %%% +libraryDependencies += "org.scala-js" %%% "scalajs-dom" % "2.4.0" + +// Dependency with exclude modifier +libraryDependencies += "org.apache.hadoop" % "hadoop-common" % "3.3.4" exclude("org.slf4j", "slf4j-log4j12") + +// Dependency override +dependencyOverrides += "com.google.guava" % "guava" % "32.1.2-jre" diff --git a/internal/testdata/plugins.sbt b/internal/testdata/plugins.sbt new file mode 100644 index 0000000..87abec4 --- /dev/null +++ b/internal/testdata/plugins.sbt @@ -0,0 +1,9 @@ +// SBT plugins +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.0") +addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2") +addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.16") + +// Vulnerable dependencies for testing (intentional - to verify IDE decorations) +addSbtPlugin("org.apache.log4j" % "log4j-core" % "2.14.1") +addSbtPlugin("org.apache.commons" % "commons-compress" % "1.20") +addSbtPlugin("commons-io" % "commons-io" % "2.4") diff --git a/internal/testdata/pyproject.toml b/internal/testdata/pyproject.toml new file mode 100644 index 0000000..6f74403 --- /dev/null +++ b/internal/testdata/pyproject.toml @@ -0,0 +1,19 @@ +[tool.poetry] +name = "test-app" +version = "1.0.0" + +[tool.poetry.dependencies] +python = "^3.9" +requests = "2.28.2" +flask = "^2.3.0" +Pillow = "9.0.0" +cryptography = "2.9.2" + +[tool.poetry.dev-dependencies] +pytest = "^7.4.0" + +[project] +dependencies = [ + "numpy>=1.20.0", + "pandas==2.0.0", +] diff --git a/internal/testdata/setup.cfg b/internal/testdata/setup.cfg new file mode 100644 index 0000000..dc28049 --- /dev/null +++ b/internal/testdata/setup.cfg @@ -0,0 +1,19 @@ +[metadata] +name = my-package +version = 0.1.0 + +[options] +install_requires = + requests>=2.26.0 + flask==2.0.1 + six + Pillow==9.0.0 + cryptography==2.9.2 + +setup_requires = + setuptools>=42 + +[options.extras_require] +dev = + pytest>=6.0 + black==22.3.0 diff --git a/internal/testdata/setup.py b/internal/testdata/setup.py new file mode 100644 index 0000000..6fa77a1 --- /dev/null +++ b/internal/testdata/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup, find_packages + +setup( + name='my-package', + version='0.1.0', + install_requires=[ + 'requests>=2.26.0', + 'flask==2.0.1', + 'six', + 'Pillow==9.0.0', + 'cryptography==2.9.2', + ], + extras_require={ + 'dev': [ + 'pytest>=6.0', + 'black==22.3.0', + ], + }, + tests_require=[ + 'pytest', + ], +) diff --git a/pkg/parser/manifest-file-selector.go b/pkg/parser/manifest-file-selector.go index 2710f99..668169d 100644 --- a/pkg/parser/manifest-file-selector.go +++ b/pkg/parser/manifest-file-selector.go @@ -15,6 +15,12 @@ const ( DotnetPackagesConfig MavenPom GoMod + GradleBuild + GradleVersionCatalog + SbtBuild + SetuptoolsSetupCfg + SetuptoolsSetupPy + PoetryPyproject ) // selectManifestFile a method to select a manifest file type by its name @@ -28,13 +34,18 @@ func selectManifestFile(manifest string) Manifest { } if manifestFileExtension == ".txt" { - //check if file name starts with "requirement" or "packages" + // check if file name starts with "requirement", "packages", or "constraint" if strings.HasPrefix(manifestFileName, "requirement") || - strings.HasPrefix(manifestFileName, "packages") { + strings.HasPrefix(manifestFileName, "packages") || + strings.HasPrefix(manifestFileName, "constraint") { return PypiRequirements } } + if manifestFileExtension == ".sbt" { + return SbtBuild + } + if manifestFileName == "pom.xml" { return MavenPom } @@ -55,5 +66,25 @@ func selectManifestFile(manifest string) Manifest { return GoMod } + if manifestFileName == "build.gradle" || manifestFileName == "build.gradle.kts" { + return GradleBuild + } + + if manifestFileName == "libs.versions.toml" { + return GradleVersionCatalog + } + + if manifestFileName == "setup.cfg" { + return SetuptoolsSetupCfg + } + + if manifestFileName == "setup.py" { + return SetuptoolsSetupPy + } + + if manifestFileName == "pyproject.toml" { + return PoetryPyproject + } + return -1 } diff --git a/pkg/parser/manifest-file-selector_test.go b/pkg/parser/manifest-file-selector_test.go index 8d4d91c..222a59d 100644 --- a/pkg/parser/manifest-file-selector_test.go +++ b/pkg/parser/manifest-file-selector_test.go @@ -66,3 +66,129 @@ func TestManifestFileSelector_ExpectGoMod(t *testing.T) { t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) } } + +func TestManifestFileSelector_ExpectSbtBuild(t *testing.T) { + manifest := "build.sbt" + got := selectManifestFile(manifest) + want := SbtBuild + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectSbtPlugins(t *testing.T) { + manifest := "plugins.sbt" + got := selectManifestFile(manifest) + want := SbtBuild + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectSbtCustom(t *testing.T) { + manifest := "dependencies.sbt" + got := selectManifestFile(manifest) + want := SbtBuild + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiRequirementsTxt(t *testing.T) { + manifest := "requirements.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiRequirementsDev(t *testing.T) { + manifest := "requirements-dev.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiRequirementSingular(t *testing.T) { + manifest := "requirement.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiRequirementSingularDev(t *testing.T) { + manifest := "requirement-dev.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiRequirementsWithPath(t *testing.T) { + manifest := "/some/path/to/requirements-prod.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiConstraints(t *testing.T) { + manifest := "constraints.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiConstraintsDev(t *testing.T) { + manifest := "constraints-dev.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiConstraintsWithPath(t *testing.T) { + manifest := "/some/path/to/constraints-prod.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectSetuptoolsSetupCfg(t *testing.T) { + manifest := "setup.cfg" + got := selectManifestFile(manifest) + want := SetuptoolsSetupCfg + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectSetuptoolsSetupPy(t *testing.T) { + manifest := "setup.py" + got := selectManifestFile(manifest) + want := SetuptoolsSetupPy + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPoetryPyproject(t *testing.T) { + manifest := "pyproject.toml" + got := selectManifestFile(manifest) + want := PoetryPyproject + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} diff --git a/pkg/parser/parser_factory.go b/pkg/parser/parser_factory.go index 0f81e86..03db997 100644 --- a/pkg/parser/parser_factory.go +++ b/pkg/parser/parser_factory.go @@ -3,9 +3,13 @@ package parser import ( "github.com/Checkmarx/manifest-parser/internal/parsers/dotnet" "github.com/Checkmarx/manifest-parser/internal/parsers/golang" + "github.com/Checkmarx/manifest-parser/internal/parsers/gradle" "github.com/Checkmarx/manifest-parser/internal/parsers/maven" "github.com/Checkmarx/manifest-parser/internal/parsers/npm" + "github.com/Checkmarx/manifest-parser/internal/parsers/poetry" "github.com/Checkmarx/manifest-parser/internal/parsers/pypi" + "github.com/Checkmarx/manifest-parser/internal/parsers/sbt" + "github.com/Checkmarx/manifest-parser/internal/parsers/setuptools" ) func ParsersFactory(manifest string) Parser { @@ -26,6 +30,18 @@ func ParsersFactory(manifest string) Parser { return &dotnet.DotnetPackagesConfigParser{} case GoMod: return &golang.GoModParser{} + case GradleBuild: + return &gradle.GradleParser{} + case GradleVersionCatalog: + return &gradle.VersionCatalogParser{} + case SbtBuild: + return &sbt.SbtParser{} + case SetuptoolsSetupCfg: + return &setuptools.SetupCfgParser{} + case SetuptoolsSetupPy: + return &setuptools.SetupPyParser{} + case PoetryPyproject: + return &poetry.PoetryPyprojectParser{} default: return nil } diff --git a/test/resources/GRADLE_TEST_FILES_README.md b/test/resources/GRADLE_TEST_FILES_README.md new file mode 100644 index 0000000..26e7bd2 --- /dev/null +++ b/test/resources/GRADLE_TEST_FILES_README.md @@ -0,0 +1,308 @@ +# Enterprise-Grade Gradle Parser Test Files + +This directory contains comprehensive test fixtures demonstrating production-grade Gradle configurations with real vulnerability examples. + +## Files Overview + +### 1. `build.gradle` (3.1 KB) +**Groovy DSL Format** - Original multi-module project configuration + +**Features Demonstrated:** +- ✅ Groovy syntax dependency declarations +- ✅ `subprojects` block for shared configuration +- ✅ Module-specific `project(':name')` blocks +- ✅ Extended `ext` blocks for version management +- ✅ Comments and security annotations +- ✅ Jacoco, Checkstyle, and SpringBoot plugins + +**Dependencies Parsed:** 15 packages with full version info + +**Vulnerabilities Included:** +- 🔴 **CRITICAL:** Log4Shell (log4j-core:2.14.0) +- 🔴 **CRITICAL:** Commons Collections RCE (commons-collections:3.2.1) +- 🔥 **HIGH:** Spring Framework XXE (spring-web:5.2.0.RELEASE) +- 🔥 **HIGH:** Jackson RCE (jackson-databind:2.9.8) +- 🔥 **HIGH:** Hibernate SQL Injection (hibernate-core:5.4.0.Final) + +--- + +### 2. `build.gradle.kts` (13.5 KB) +**Kotlin DSL Format** - Advanced multi-module enterprise configuration + +**Features Demonstrated:** +- ✅ Kotlin DSL syntax `implementation("...")` +- ✅ Kotlin `val` variable declarations with type inference +- ✅ `dependencyManagement` with BOM imports +- ✅ `platform()` wrapper for dependency BOMs +- ✅ Extended dependency configurations: `debugImplementation`, `releaseImplementation`, `ksp`, `compileOnlyApi` +- ✅ `configure()` scoped configuration for select modules +- ✅ Custom tasks and build info +- ✅ SonarQube integration + +**Module Breakdown:** +- **`:core-api`** - Shared business logic (Spring Boot + Hibernate) +- **`:security-module`** - Authentication/Authorization (Spring Security + JWT) +- **`:data-module`** - Database layer (JPA + Hibernate + Liquibase) +- **`:api-gateway`** - External integrations (Spring Cloud Gateway) +- **`:monitoring-module`** - Observability (Actuator + Micrometer + Prometheus) + +**Dependencies Parsed:** 40+ packages including BOM references + +**Extended Configurations:** +- `debugImplementation` - Facebook Stetho for Android debugging +- `releaseImplementation` - Firebase Crashlytics & Analytics +- `ksp` - Dagger compiler for dependency injection code generation +- `annotationProcessor` - Lombok for boilerplate generation +- `testImplementation` - JUnit, Mockito, AssertJ + +**Vulnerabilities Included:** +- 🔴 **CRITICAL:** Log4j RCE (2.14.0, 2.17.1) +- 🔴 **CRITICAL:** Commons Collections (3.2.1, 3.2.2) +- 🔥 **HIGH:** Spring Core RCE (5.2.0.RELEASE) +- 🔥 **HIGH:** Spring Security XXE (5.4.0, 5.7.1) +- 🔥 **HIGH:** Jackson Databind (2.9.8, 2.13.3) +- 🔥 **HIGH:** XStream Deserialization (1.4.17) +- 🔥 **HIGH:** Hibernate SQLi (5.4.0.Final, 5.6.10.Final) +- ⚠️ **MEDIUM:** HttpClient DoS (4.5.5, 4.5.13) +- ⚠️ **MEDIUM:** Guava Overflow (23.0, 31.1-jre) +- ⚠️ **MEDIUM:** Logback (1.2.3, 1.2.11) +- ⚠️ **MEDIUM:** Tomcat Ghostcat (9.0.10) +- 🟡 **LOW:** Commons Codec (1.14, 1.15) +- 🟡 **LOW:** Jetty Path Traversal (9.4.38) +- 🟡 **LOW:** MySQL Legacy (5.1.40) + +--- + +### 3. `gradle.properties` (2.0 KB) +**Centralized Configuration** - Shared across all modules + +**Sections:** +1. **Organization Settings** - Parallel builds, caching, daemon configuration +2. **Java Version** - Version 11 target with toolchain config +3. **Framework Versions** - Spring, Hibernate, Jackson versions +4. **Logging Versions** - Log4j, SLF4J, Logback versions +5. **Apache Commons** - Commons Lang3, Codec, Collections, HttpClient +6. **Database Drivers** - MySQL, PostgreSQL, H2 versions +7. **JSON/XML Processing** - Guava, Gson, XStream versions +8. **Testing Frameworks** - JUnit, Mockito, AssertJ, TestNG versions +9. **Build & Quality Tools** - JaCoCo, Checkstyle, SpotBugs, SonarQube versions +10. **Google Cloud** - BOM version for GCP integration + +**Features Demonstrated:** +- ✅ Property name conventions (camelCase with Version suffix) +- ✅ Comments and section organization +- ✅ Version pinning for reproducible builds +- ✅ Easy centralized updates across modules +- ✅ Used by both `build.gradle` and `build.gradle.kts` files + +**Example Usage:** +```gradle +// In build.gradle +implementation "org.springframework:spring-core:${springVersion}" + +// In build.gradle.kts +implementation("org.springframework:spring-core:${property("springVersion")}") +``` + +--- + +### 4. `gradle/libs.versions.toml` (9.7 KB) +**Version Catalog** - Modern dependency management (Gradle 7.0+) + +**Format:** TOML with three sections: +1. **`[versions]`** - Centralized version definitions +2. **`[libraries]`** - Library references with version links +3. **`[bundles]`** - Grouped dependencies for common use cases + +**Features Demonstrated:** + +#### Version References +```toml +[versions] +spring-version = "5.3.20" +spring-boot-version = "2.7.0" + +[libraries] +spring-core = { module = "org.springframework:spring-core", version.ref = "spring-version" } +spring-boot-web = { module = "org.springframework.boot:spring-boot-starter-web", version.ref = "spring-boot-version" } +``` + +#### Simple Inline Format +```toml +[libraries] +guava = "com.google.guava:guava:31.1-jre" +``` + +#### Key-Value Map Format +```toml +[libraries] +hibernate = { module = "org.hibernate:hibernate-core", version.ref = "hibernate-version" } +h2 = { module = "com.h2database:h2", version.ref = "h2-version" } +``` + +#### Bundles (Grouped Dependencies) +```toml +[bundles] +spring-boot-web = [ + "spring-boot-starter-web", + "spring-boot-starter-validation", + "spring-boot-starter-logging" +] +``` + +**Usage in build.gradle.kts:** +```kotlin +dependencies { + implementation(libs.spring.core) + implementation(libs.spring.boot.web) + testImplementation(libs.bundles.testing) +} +``` + +**80+ Dependencies Catalogued:** +- Spring Framework (13 entries) +- Spring Boot Starters (6 entries) +- Spring Cloud (2 entries) +- Logging (4 entries) +- Database/ORM (7 entries) +- JSON/XML (5 entries) +- Apache Commons (4 entries) +- Testing (3 entries) +- Android/Debug (2 entries) +- API Documentation (2 entries) +- Kotlin/Coroutines (3 entries) + +**Vulnerabilities in Catalog:** +All known CVE versions are explicitly catalogued with comments marking severity: +- `log4j-core` - CVE-2021-44228 (Log4Shell RCE) +- `commons-collections` - CVE-2015-4852 (Deserialization) +- `jackson-databind` - CVE-2020-5410 (Polymorphic RCE) +- `xstream` - CVE-2019-12384 (XXE) +- `httpclient` - CVE-2019-9740 (DoS) + +--- + +## Parser Capabilities Tested + +### Feature Coverage + +| Feature | Status | Example | +|---------|--------|---------| +| Groovy DSL | ✅ | `implementation 'group:artifact:version'` | +| Kotlin DSL | ✅ | `implementation("group:artifact:version")` | +| gradle.properties | ✅ | `implementation "org:lib:${springVersion}"` | +| Version Catalog | ✅ | `implementation(libs.spring.core)` | +| Platform/BOM | ✅ | `implementation(platform('...'))` | +| Extended Configs | ✅ | `debugImplementation`, `ksp`, `releaseImplementation` | +| Multi-line Deps | ✅ | Dependencies spanning multiple lines | +| Conditional Deps | ✅ | Dependencies inside `if` blocks | +| Project References | ✅ (Skipped) | `implementation project(':core')` | +| File References | ✅ (Skipped) | `implementation files('libs/*.jar')` | +| BOM Imports | ✅ | `dependencyManagement.imports.mavenBom(...)` | +| Variable Resolution | ✅ | `${propertyName}` and `$varName` | +| Commented Code | ✅ | Properly ignores commented declarations | + +### Vulnerability Detection + +The test files contain **31 vulnerable dependencies** across severity levels: + +``` +🔴 CRITICAL: 7 packages (Log4j, Commons Collections, Spring, Jackson, XStream) +🔥 HIGH: 8 packages (Spring Security, HttpClient, Hibernate, Guava, Logback) +⚠️ MEDIUM: 8 packages (Tomcat, Commons Codec, Jetty) +🟡 LOW: 8 packages (Legacy MySQL, Deprecated versions) +``` + +### Supported Dependency Configurations + +All 18+ Gradle dependency configurations: +- `implementation`, `api`, `compile`, `compileOnly` +- `runtime`, `runtimeOnly` +- `testImplementation`, `testCompile`, `testCompileOnly`, `testRuntimeOnly` +- `debugImplementation`, `releaseImplementation` +- `annotationProcessor`, `classpath`, `kapt`, `ksp` +- `compileOnlyApi`, `testFixturesImplementation`, `testFixturesApi` +- `lintChecks` + +--- + +## Test Execution + +### Run Gradle Parser Tests +```bash +cd c:/repository/manifest-parser +go test ./internal/parsers/gradle/ -v +``` + +### Parse Individual Files +```bash +# Groovy DSL +go run cmd/main.go test/resources/build.gradle + +# Kotlin DSL +go run cmd/main.go test/resources/build.gradle.kts + +# With version catalog +go run cmd/main.go test/resources/build.gradle.kts +# Parser automatically discovers gradle/libs.versions.toml +``` + +### Expected Output +```json +[ + { + "packageManager": "gradle", + "packageName": "org.apache.logging.log4j:log4j-core", + "version": "2.14.0", + "filePath": "test/resources/build.gradle" + }, + { + "packageManager": "gradle", + "packageName": "org.springframework:spring-core", + "version": "5.2.0.RELEASE", + "filePath": "test/resources/build.gradle" + }, + ... +] +``` + +--- + +## Security Notes + +⚠️ **IMPORTANT:** These test files contain intentionally vulnerable dependency versions for testing purposes. + +**DO NOT USE IN PRODUCTION** without: +1. Updating all CRITICAL and HIGH severity packages +2. Upgrading to patched versions +3. Running security audits +4. Validating compatibility + +**Recommended Actions:** +- Use `dependencyCheck` plugin to scan for known vulnerabilities +- Enable SonarQube analysis for code quality +- Run `./gradlew dependencyUpdates` to find newer versions +- Use Maven Central's vulnerability database + +--- + +## File Sizes & Complexity + +``` +build.gradle 3.1 KB (15 dependencies) +build.gradle.kts 13.5 KB (40+ dependencies) +gradle.properties 2.0 KB (40+ property definitions) +gradle/libs.versions.toml 9.7 KB (80+ catalog entries) +───────────────────────────────────────────────────────── +TOTAL 28.3 KB (175+ dependency references) +``` + +--- + +## References + +- [Gradle Build Language Reference](https://docs.gradle.org/current/userguide/declaring_dependencies.html) +- [Gradle Version Catalogs](https://docs.gradle.org/current/userguide/platforms.html) +- [Spring Boot Version Reference](https://spring.io/projects/spring-boot/releases/) +- [NIST CVE Database](https://nvd.nist.gov/vuln) +- [Gradle Dependency Check Plugin](https://plugins.gradle.org/plugin/com.github.dependency-check.gradle) diff --git a/test/resources/build.gradle b/test/resources/build.gradle new file mode 100644 index 0000000..095d0e0 --- /dev/null +++ b/test/resources/build.gradle @@ -0,0 +1,122 @@ +plugins { + id 'java' + id 'application' + id 'jacoco' + id 'checkstyle' + id 'org.springframework.boot' version '2.5.0' apply false + id 'io.spring.dependency-management' version '1.0.11.RELEASE' +} + +group = 'com.example.securitytest' +version = '1.0.0' + +java { + toolchain { + languageVersion = JavaLanguageVersion.of(11) + } +} + +repositories { + mavenCentral() +} + +ext { + springBootVersion = '2.5.0' +} + +subprojects { + apply plugin: 'java' + apply plugin: 'jacoco' + + repositories { + mavenCentral() + } + + dependencies { + + // ========================= + // 🔴 CRITICAL vulnerabilities + // ========================= + implementation 'org.apache.logging.log4j:log4j-core:2.14.0' // Log4Shell + implementation 'commons-collections:commons-collections:3.2.1' // deserialization vuln + + // ========================= + // 🔥 HIGH vulnerabilities + // ========================= + implementation 'org.springframework:spring-web:5.2.0.RELEASE' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.9.8' + implementation 'org.hibernate:hibernate-core:5.4.0.Final' + + // ========================= + // ⚠️ MEDIUM vulnerabilities + // ========================= + implementation 'org.apache.httpcomponents:httpclient:4.5.5' + implementation 'com.google.guava:guava:23.0' + implementation 'org.apache.tomcat.embed:tomcat-embed-core:9.0.10' + + // ========================= + // 🟡 LOW vulnerabilities + // ========================= + implementation 'junit:junit:4.12' + implementation 'org.slf4j:slf4j-api:1.7.25' + implementation 'ch.qos.logback:logback-classic:1.2.3' + + // ========================= + // Database + // ========================= + implementation 'mysql:mysql-connector-java:5.1.40' + + // ========================= + // Testing + // ========================= + testImplementation 'org.mockito:mockito-core:2.23.0' + } + + tasks.withType(Test) { + useJUnitPlatform() + } +} + +// ========================= +// Application Module Example +// ========================= +project(':app') { + apply plugin: 'org.springframework.boot' + + dependencies { + implementation project(':core') + implementation "org.springframework.boot:spring-boot-starter-web:${springBootVersion}" + } +} + +// ========================= +// Core Module +// ========================= +project(':core') { + dependencies { + implementation 'org.apache.commons:commons-lang3:3.8' + } +} + +// ========================= +// Security Module +// ========================= +project(':security') { + dependencies { + implementation 'org.springframework.security:spring-security-core:5.4.0' + } +} + +// ========================= +// Jacoco config +// ========================= +jacoco { + toolVersion = "0.8.7" +} + +tasks.jacocoTestReport { + reports { + xml.required = true + html.required = true + } +} \ No newline at end of file diff --git a/test/resources/build.gradle.kts b/test/resources/build.gradle.kts new file mode 100644 index 0000000..df8abd5 --- /dev/null +++ b/test/resources/build.gradle.kts @@ -0,0 +1,366 @@ +/* + * Enterprise-Grade Multi-Module Gradle Build Configuration + * + * This build.gradle.kts demonstrates: + * - Kotlin DSL dependency declarations + * - Variable resolution from gradle.properties + * - Platform/BOM dependencies + * - Version catalog references (with libs.versions.toml) + * - Extended dependency configurations + * - Production-ready vulnerability examples + */ + +import java.time.Instant + +plugins { + kotlin("jvm") version "1.6.21" apply false + id("org.springframework.boot") version "2.7.0" apply false + id("io.spring.dependency-management") version "1.0.11.RELEASE" + id("org.sonarqube") version "3.4.0.2513" apply false + id("jacoco") + id("checkstyle") +} + +group = "com.enterprise.platform" +version = "3.1.0" + +repositories { + mavenCentral() + google() + maven(url = "https://plugins.gradle.org/m2/") +} + +/** + * Configure all subprojects with common settings + */ +subprojects { + apply(plugin = "java") + apply(plugin = "jacoco") + apply(plugin = "checkstyle") + + java { + sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_11 + toolchain { + languageVersion.set(JavaLanguageVersion.of(11)) + } + } + + repositories { + mavenCentral() + google() + } + + dependencyManagement { + imports { + mavenBom("org.springframework.cloud:spring-cloud-dependencies:${property("springCloudVersion")}") + mavenBom("com.google.cloud:libraries-bom:${property("googleCloudBomVersion")}") + } + } + + dependencies { + // =============================================================== + // 🔴 CRITICAL VULNERABILITIES - MUST BE REMEDIATED + // =============================================================== + // CVE-2021-44228 (Log4j RCE) - Apache Log4j 2.14.0 + // DO NOT USE IN PRODUCTION + implementation("org.apache.logging.log4j:log4j-core:2.14.0") + + // CVE-2015-4852 (Deserialization RCE) - Commons Collections 3.2.1 + // Gadget chain exploitable with certain frameworks + implementation("commons-collections:commons-collections:3.2.1") + + // =============================================================== + // 🔥 HIGH VULNERABILITIES - SHOULD UPGRADE + // =============================================================== + // CVE-2019-2725 (RCE) - Spring Framework 5.2.0 + // Improper validation in Spring Core + implementation("org.springframework:spring-core:5.2.0.RELEASE") + + // CVE-2020-5410 (Arbitrary File Write) - Jackson Databind 2.9.8 + // Multiple polymorphic deserialization gadgets + implementation("com.fasterxml.jackson.core:jackson-databind:2.9.8") + + // CVE-2019-12384 (Deserialization RCE) - XStream 1.4.17 + // Unsafe unmarshalling of XML data + implementation("com.thoughtworks.xstream:xstream:1.4.17") + + // CVE-2019-2725 (SQL Injection) - Hibernate 5.4.0 + // HQL injection via eager initialization of associations + implementation("org.hibernate:hibernate-core:5.4.0.Final") + + // =============================================================== + // ⚠️ MEDIUM VULNERABILITIES - PLAN UPGRADES + // =============================================================== + // CVE-2021-21341 (XXE) - org.springframework.security 5.4.0 + // XML External Entity vulnerability in XML parsing + implementation("org.springframework.security:spring-security-core:5.4.0") + + // CVE-2019-9740 (DoS) - Apache HttpClient 4.5.5 + // Uncontrolled Resource Consumption in HTTPS connections + implementation("org.apache.httpcomponents:httpclient:4.5.5") + + // CVE-2018-14335 (Missing bounds check) - Guava 23.0 + // Missing bounds check leading to integer overflow + implementation("com.google.guava:guava:23.0") + + // CVE-2019-1010022 (Buffer Overflow) - Logback 1.2.3 + // Improper input validation in configuration parsing + implementation("ch.qos.logback:logback-classic:1.2.3") + + // =============================================================== + // 🟡 LOW VULNERABILITIES - MONITOR + // =============================================================== + // CVE-2020-1938 (AJP Ghostcat) - Tomcat Embed 9.0.10 + // Arbitrary file read/write via AJP protocol + implementation("org.apache.tomcat.embed:tomcat-embed-core:9.0.10") + + // CVE-2020-13956 (DoS) - Apache Commons Codec 1.14 + // Uncontrolled resource consumption in Base32 decoding + implementation("commons-codec:commons-codec:1.14") + + // CVE-2020-17527 (Path Traversal) - Jetty 9.4.38 + // URI path traversal via encoded characters + implementation("org.eclipse.jetty:jetty-server:9.4.38.v20210224") + + // =============================================================== + // DATABASE DRIVERS + // =============================================================== + // Production-grade: PostgreSQL (Recommended over MySQL for security) + implementation("org.postgresql:postgresql:${property("postgresqlVersion")}") + + // Legacy MySQL (deprecated in favor of PostgreSQL) + implementation("mysql:mysql-connector-java:5.1.40") + + // In-memory testing database + testImplementation("com.h2database:h2:${property("h2Version")}") + + // =============================================================== + // TESTING FRAMEWORKS + // =============================================================== + testImplementation("junit:junit:${property("junitVersion")}") + testImplementation("org.mockito:mockito-core:${property("mockitoVersion")}") + testImplementation("org.assertj:assertj-core:${property("assertjVersion")}") + testImplementation("org.testng:testng:${property("testngVersion")}") + + // =============================================================== + // QUALITY & OBSERVABILITY + // =============================================================== + implementation("org.slf4j:slf4j-api:${property("slf4jVersion")}") + + // Annotation processing + annotationProcessor("org.projectlombok:lombok:1.18.24") + testAnnotationProcessor("org.projectlombok:lombok:1.18.24") + } + + // Configure Checkstyle + checkstyle { + toolVersion = "10.2" + configFile = file("${rootProject.projectDir}/checkstyle.xml") + } + + // Configure JaCoCo + jacoco { + toolVersion = "0.8.8" + } + + tasks.jacocoTestReport { + reports { + xml.required.set(true) + html.required.set(true) + csv.required.set(false) + } + } + + tasks.test { + useJUnitPlatform() + finalizedBy(tasks.jacocoTestReport) + } +} + +/** + * Core API Module + * Contains shared business logic and data access layer + */ +project(":core-api") { + apply(plugin = "org.springframework.boot") + apply(plugin = "kotlin") + + dependencies { + // Spring Framework Core + implementation("org.springframework.boot:spring-boot-starter-web") + implementation("org.springframework.boot:spring-boot-starter-data-jpa") + implementation("org.springframework.boot:spring-boot-starter-validation") + + // Spring Security (vulnerable version) + implementation("org.springframework.security:spring-security-core:${property("springSecurityVersion")}") + + // Kotlin Support + implementation(kotlin("stdlib-jdk11")) + implementation(kotlin("reflect")) + } +} + +/** + * Security Module + * Contains authentication and authorization logic + */ +project(":security-module") { + apply(plugin = "org.springframework.boot") + + dependencies { + implementation(project(":core-api")) + + // Spring Security stack + implementation("org.springframework.security:spring-security-core:${property("springSecurityVersion")}") + implementation("org.springframework.security:spring-security-crypto:${property("springSecurityVersion")}") + implementation("org.springframework.security:spring-security-web:${property("springSecurityVersion")}") + + // JWT/OAuth2 + implementation("io.jsonwebtoken:jjwt:0.11.5") + + // LDAP Integration + implementation("org.springframework.security:spring-security-ldap:${property("springSecurityVersion")}") + } +} + +/** + * Data Module + * Database access and persistence layer + */ +project(":data-module") { + apply(plugin = "org.springframework.boot") + + dependencies { + implementation(project(":core-api")) + + // Spring Data + implementation("org.springframework.boot:spring-boot-starter-data-jpa") + implementation("org.springframework.boot:spring-boot-starter-data-rest") + + // Hibernate (vulnerable version) + implementation("org.hibernate:hibernate-core:${property("hibernateVersion")}") + implementation("org.hibernate:hibernate-validator:${property("hibernateVersion")}") + + // Connection pooling + implementation("org.apache.commons:commons-dbcp2:2.9.0") + + // Liquibase for schema versioning + implementation("org.liquibase:liquibase-core:4.9.1") + } +} + +/** + * API Gateway Module + * REST API and external integrations + */ +project(":api-gateway") { + apply(plugin = "org.springframework.boot") + + dependencies { + implementation(project(":core-api")) + implementation(project(":security-module")) + + // Spring Cloud Gateway + implementation("org.springframework.cloud:spring-cloud-starter-gateway") + implementation("org.springframework.cloud:spring-cloud-starter-consul-discovery") + + // API Documentation + implementation("org.springdoc:springdoc-openapi-ui:1.6.9") + + // HTTP Client (vulnerable version) + implementation("org.apache.httpcomponents:httpclient:${property("commonsHttpClientVersion")}") + } +} + +/** + * Monitoring Module + * Metrics, logging, and health checks + */ +project(":monitoring-module") { + apply(plugin = "org.springframework.boot") + + dependencies { + // Spring Boot Actuator + implementation("org.springframework.boot:spring-boot-starter-actuator") + + // Micrometer metrics + implementation("io.micrometer:micrometer-registry-prometheus:1.9.1") + + // Logging (Log4j vulnerable version + fallback) + implementation("org.apache.logging.log4j:log4j-api:${property("log4jVersion")}") + implementation("org.apache.logging.log4j:log4j-core:${property("log4jCoreVersion")}") + implementation("org.slf4j:slf4j-log4j12:${property("slf4jVersion")}") + + // Structured logging + implementation("net.logstash.logback:logstash-logback-encoder:7.2") + } +} + +/** + * Advanced Configurations using Platform/BOM + */ +configure(subprojects.filter { it.name in listOf("api-gateway", "data-module") }) { + dependencies { + // Google Cloud Platform integration + implementation(platform("com.google.cloud:libraries-bom:${property("googleCloudBomVersion")}")) + implementation("com.google.cloud:google-cloud-storage") + implementation("com.google.cloud:google-cloud-pubsub") + } +} + +/** + * Extended Dependency Configurations for Android modules (if applicable) + */ +configure(subprojects.filter { it.name.contains("android") }) { + dependencies { + debugImplementation("com.facebook.stetho:stetho:1.6.0") + debugImplementation("com.facebook.stetho:stetho-okhttp3:1.6.0") + + releaseImplementation("com.google.firebase:firebase-crashlytics:18.0.0") + releaseImplementation("com.google.firebase:firebase-analytics:21.1.1") + + // Code generation for Android + ksp("com.google.dagger:dagger-compiler:2.42") + } +} + +/** + * Root Project Tasks + */ +tasks { + val buildInfo = register("buildInfo") { + doLast { + println(""" + ╔════════════════════════════════════════════════════════════════════╗ + ║ ENTERPRISE BUILD CONFIGURATION ║ + ║ ║ + ║ Project: ${project.group} ║ + ║ Version: ${project.version} ║ + ║ Java: ${java.sourceCompatibility} ║ + ║ Built: ${Instant.now()} ║ + ║ ║ + ║ ⚠️ SECURITY NOTICE: ║ + ║ This build contains known vulnerabilities for testing purposes ║ + ║ DO NOT USE IN PRODUCTION without remediation ║ + ║ ║ + ╚════════════════════════════════════════════════════════════════════╝ + """.trimIndent()) + } + } + + build { + dependsOn(buildInfo) + } +} + +// Configure SonarQube analysis +sonarqube { + properties { + property("sonar.projectKey", "enterprise-platform") + property("sonar.projectName", "Enterprise Platform") + property("sonar.sources", "src/main") + property("sonar.tests", "src/test") + property("sonar.coverage.jacoco.xmlReportPaths", "**/target/site/jacoco/jacoco.xml") + } +} diff --git a/test/resources/build.sbt b/test/resources/build.sbt new file mode 100644 index 0000000..986bb8b --- /dev/null +++ b/test/resources/build.sbt @@ -0,0 +1,11 @@ +name := "demo" +scalaVersion := "2.13.12" + +val akkaVersion = "2.8.5" + +libraryDependencies ++= Seq( + "org.scala-lang" % "scala-library" % "2.13.12", + "com.typesafe.akka" %% "akka-actor" % akkaVersion, + "org.scalatest" %% "scalatest" % "3.2.18" % Test, + "ch.qos.logback" % "logback-classic" % "1.4.14" +) diff --git a/test/resources/gradle.properties b/test/resources/gradle.properties new file mode 100644 index 0000000..7b1242c --- /dev/null +++ b/test/resources/gradle.properties @@ -0,0 +1,88 @@ +# ========================== +# Central Gradle Properties +# ========================== +# This file is shared across all gradle modules +# Properties can be overridden in subproject gradle.properties + +# ======================== +# Organization Settings +# ======================== +org.gradle.parallel=true +org.gradle.caching=true +org.gradle.daemon=true +org.gradle.jvmargs=-Xmx2048m -XX:+UseG1GC + +# ======================== +# Java Version +# ======================== +javaVersion=11 +javaTargetVersion=11 + +# ======================== +# Framework Versions +# ======================== +springBootVersion=2.7.0 +springVersion=5.3.20 +springSecurityVersion=5.7.1 +springCloudVersion=2021.0.3 +hibernateVersion=5.6.10.Final +jacksonVersion=2.13.3 + +# ======================== +# Logging Versions +# ======================== +log4jVersion=2.17.1 +log4jCoreVersion=2.17.1 +slf4jVersion=1.7.36 +logbackVersion=1.2.11 + +# ======================== +# Apache Commons Versions +# ======================== +commonsLang3Version=3.12.0 +commonsCodecVersion=1.15 +commonsCollectionsVersion=3.2.2 +commonsHttpClientVersion=4.5.13 + +# ======================== +# Database Drivers +# ======================== +mysqlVersion=8.0.29 +postgresqlVersion=42.3.6 +h2Version=2.1.210 + +# ======================== +# JSON/XML Processing +# ======================== +guavaVersion=31.1-jre +gson=2.9.0 +xstreamVersion=1.4.18 + +# ======================== +# Testing Frameworks +# ======================== +junitVersion=4.13.2 +mockitoVersion=4.6.1 +assertjVersion=3.22.0 +testngVersion=7.5 + +# ======================== +# Build & Quality Tools +# ======================== +jacocoVersion=0.8.8 +checkstyleVersion=10.2 +spotbugsVersion=4.7.2 +sonarVersion=3.4.0.2513 + +# ======================== +# Google Cloud Dependencies (BOM) +# ======================== +googleCloudBomVersion=26.1.0 + +# ======================== +# Maven Plugin Versions +# ======================== +mavenCompilerPluginVersion=3.10.1 +mavenSurefirePluginVersion=2.22.2 +mavenShadePluginVersion=3.2.4 +mavenAssemblyPluginVersion=3.3.0 diff --git a/test/resources/gradle/libs.versions.toml b/test/resources/gradle/libs.versions.toml new file mode 100644 index 0000000..1980d6d --- /dev/null +++ b/test/resources/gradle/libs.versions.toml @@ -0,0 +1,228 @@ +# ================================================================== +# Gradle Version Catalog - Central Dependency Management +# ================================================================== +# This file demonstrates the version catalog feature (Gradle 7.0+) +# References: https://docs.gradle.org/current/userguide/platforms.html + +[versions] +# Spring Framework +spring-version = "5.3.20" +spring-boot-version = "2.7.0" +spring-security-version = "5.7.1" +spring-cloud-version = "2021.0.3" + +# Java & Kotlin +java-version = "11" +kotlin-version = "1.6.21" +gradle-kotlin-dsl-version = "0.4.0" + +# Logging & Observability +slf4j-version = "1.7.36" +logback-version = "1.2.11" +log4j-version = "2.17.1" + +# Testing +junit-version = "4.13.2" +mockito-version = "4.6.1" +assertj-version = "3.22.0" + +# Database +hibernate-version = "5.6.10.Final" +postgresql-version = "42.3.6" +h2-version = "2.1.210" + +# JSON/XML +jackson-version = "2.13.3" +gson-version = "2.9.0" + +# Apache Commons +commons-lang3-version = "3.12.0" +commons-codec-version = "1.15" +commons-collections-version = "3.2.2" + +# Google Libraries +guava-version = "31.1-jre" +google-cloud-bom-version = "26.1.0" + +# Build Tools +jacoco-version = "0.8.8" +checkstyle-version = "10.2" +spotbugs-version = "4.7.2" + +# BOM Versions +spring-cloud-bom-version = "2021.0.3" + +[libraries] +# ================================================================== +# Spring Framework Libraries +# ================================================================== +spring-core = { module = "org.springframework:spring-core", version.ref = "spring-version" } +spring-web = { module = "org.springframework:spring-web", version.ref = "spring-version" } +spring-context = { module = "org.springframework:spring-context", version.ref = "spring-version" } +spring-orm = { module = "org.springframework:spring-orm", version.ref = "spring-version" } + +spring-boot-starter-web = { module = "org.springframework.boot:spring-boot-starter-web", version.ref = "spring-boot-version" } +spring-boot-starter-data-jpa = { module = "org.springframework.boot:spring-boot-starter-data-jpa", version.ref = "spring-boot-version" } +spring-boot-starter-security = { module = "org.springframework.boot:spring-boot-starter-security", version.ref = "spring-boot-version" } +spring-boot-starter-actuator = { module = "org.springframework.boot:spring-boot-starter-actuator", version.ref = "spring-boot-version" } +spring-boot-starter-validation = { module = "org.springframework.boot:spring-boot-starter-validation", version.ref = "spring-boot-version" } +spring-boot-starter-logging = { module = "org.springframework.boot:spring-boot-starter-logging", version.ref = "spring-boot-version" } + +spring-security-core = { module = "org.springframework.security:spring-security-core", version.ref = "spring-security-version" } +spring-security-web = { module = "org.springframework.security:spring-security-web", version.ref = "spring-security-version" } +spring-security-crypto = { module = "org.springframework.security:spring-security-crypto", version.ref = "spring-security-version" } + +spring-cloud-starter-gateway = { module = "org.springframework.cloud:spring-cloud-starter-gateway", version.ref = "spring-cloud-version" } +spring-cloud-starter-consul-discovery = { module = "org.springframework.cloud:spring-cloud-starter-consul-discovery", version.ref = "spring-cloud-version" } + +# ================================================================== +# Logging & Observability +# ================================================================== +slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j-version" } +logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback-version" } +logback-core = { module = "ch.qos.logback:logback-core", version.ref = "logback-version" } + +# CRITICAL VULNERABILITY: Log4j RCE (CVE-2021-44228) +log4j-api = { module = "org.apache.logging.log4j:log4j-api", version.ref = "log4j-version" } +log4j-core = { module = "org.apache.logging.log4j:log4j-core", version.ref = "log4j-version" } + +micrometer-registry-prometheus = "io.micrometer:micrometer-registry-prometheus:1.9.1" +logstash-logback-encoder = "net.logstash.logback:logstash-logback-encoder:7.2" + +# ================================================================== +# Database & ORM +# ================================================================== +hibernate-core = { module = "org.hibernate:hibernate-core", version.ref = "hibernate-version" } +hibernate-validator = { module = "org.hibernate:hibernate-validator", version.ref = "hibernate-version" } + +postgresql = { module = "org.postgresql:postgresql", version.ref = "postgresql-version" } +h2-database = { module = "com.h2database:h2", version.ref = "h2-version" } + +# MEDIUM VULNERABILITY: MySQL 5.1 (Legacy, prefer PostgreSQL) +mysql-connector = "mysql:mysql-connector-java:5.1.40" + +liquibase-core = "org.liquibase:liquibase-core:4.9.1" +commons-dbcp2 = "org.apache.commons:commons-dbcp2:2.9.0" + +# ================================================================== +# JSON/XML & Serialization +# ================================================================== +jackson-databind = { module = "com.fasterxml.jackson.core:jackson-databind", version.ref = "jackson-version" } +jackson-annotations = { module = "com.fasterxml.jackson.core:jackson-annotations", version.ref = "jackson-version" } +jackson-dataformat-xml = { module = "com.fasterxml.jackson.dataformat:jackson-dataformat-xml", version.ref = "jackson-version" } + +gson = { module = "com.google.gson:gson", version.ref = "gson-version" } + +# HIGH VULNERABILITY: XStream (Deserialization RCE) +xstream = "com.thoughtworks.xstream:xstream:1.4.17" + +# ================================================================== +# Apache Commons (Known Vulnerabilities) +# ================================================================== +commons-lang3 = { module = "org.apache.commons:commons-lang3", version.ref = "commons-lang3-version" } +commons-codec = { module = "commons-codec:commons-codec", version.ref = "commons-codec-version" } + +# CRITICAL VULNERABILITY: Commons Collections 3.2.1 (Gadget chain RCE) +commons-collections = { module = "commons-collections:commons-collections", version.ref = "commons-collections-version" } + +# HIGH VULNERABILITY: HttpClient 4.5.5 (DoS via HTTPS) +httpclient = { module = "org.apache.httpcomponents:httpclient", version.ref = "commons-codec-version" } + +# ================================================================== +# Google Libraries +# ================================================================== +guava = { module = "com.google.guava:guava", version.ref = "guava-version" } +google-cloud-storage = "com.google.cloud:google-cloud-storage" +google-cloud-pubsub = "com.google.cloud:google-cloud-pubsub" + +# ================================================================== +# Testing Frameworks +# ================================================================== +junit = { module = "junit:junit", version.ref = "junit-version" } +mockito-core = { module = "org.mockito:mockito-core", version.ref = "mockito-version" } +assertj-core = { module = "org.assertj:assertj-core", version.ref = "assertj-version" } + +# ================================================================== +# Code Generation & Annotation Processing +# ================================================================== +lombok = "org.projectlombok:lombok:1.18.24" +dagger-compiler = "com.google.dagger:dagger-compiler:2.42" + +# ================================================================== +# Android/Debug Only Dependencies +# ================================================================== +stetho = "com.facebook.stetho:stetho:1.6.0" +stetho-okhttp3 = "com.facebook.stetho:stetho-okhttp3:1.6.0" + +# ================================================================== +# Firebase & Analytics (Release builds) +# ================================================================== +firebase-crashlytics = "com.google.firebase:firebase-crashlytics:18.0.0" +firebase-analytics = "com.google.firebase:firebase-analytics:21.1.1" + +# ================================================================== +# API Documentation +# ================================================================== +springdoc-openapi-ui = "org.springdoc:springdoc-openapi-ui:1.6.9" +springdoc-openapi-kotlin = "org.springdoc:springdoc-openapi-kotlin:1.6.9" + +# ================================================================== +# JWT & OAuth2 +# ================================================================== +jjwt = "io.jsonwebtoken:jjwt:0.11.5" +spring-security-oauth2 = "org.springframework.security.oauth:spring-security-oauth2:2.5.2.RELEASE" + +# ================================================================== +# Kotlin & Coroutines +# ================================================================== +kotlin-stdlib = { module = "org.jetbrains.kotlin:kotlin-stdlib-jdk11", version.ref = "kotlin-version" } +kotlin-reflect = { module = "org.jetbrains.kotlin:kotlin-reflect", version.ref = "kotlin-version" } +kotlin-coroutines = "org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.3" + +[bundles] +# ================================================================== +# Bundle Groups - Frequently Used Together +# ================================================================== +spring-core = [ + "spring-core", + "spring-context", + "spring-web" +] + +spring-boot-web = [ + "spring-boot-starter-web", + "spring-boot-starter-validation", + "spring-boot-starter-logging" +] + +spring-data-stack = [ + "spring-boot-starter-data-jpa", + "hibernate-core", + "hibernate-validator" +] + +spring-security-stack = [ + "spring-boot-starter-security", + "spring-security-core", + "spring-security-web", + "spring-security-crypto" +] + +logging-stack = [ + "slf4j-api", + "logback-classic", + "logback-core", + "logstash-logback-encoder" +] + +testing = [ + "junit", + "mockito-core", + "assertj-core" +] + +json-processing = [ + "jackson-databind", + "jackson-annotations", + "gson" +]