diff --git a/grype/db/internal/provider/unmarshal/osv_vulnerability.go b/grype/db/internal/provider/unmarshal/osv_vulnerability.go index 0fa38e21be0..74408e84e26 100644 --- a/grype/db/internal/provider/unmarshal/osv_vulnerability.go +++ b/grype/db/internal/provider/unmarshal/osv_vulnerability.go @@ -6,7 +6,16 @@ import ( "github.com/google/osv-scanner/pkg/models" ) -type OSVVulnerability = models.Vulnerability +// OSVVulnerability extends models.Vulnerability with the OSV 1.7+ `upstream` +// field, which osv-scanner v1.9.2 doesn't model. Embedding keeps this a +// one-field extension rather than switching to +// github.com/ossf/osv-schema/bindings/go/osvschema (which does have Upstream). +// JSON decoding flattens correctly because models.Vulnerability has no custom +// UnmarshalJSON. +type OSVVulnerability struct { + models.Vulnerability + Upstream []string `json:"upstream,omitempty"` +} func OSVVulnerabilityEntries(reader io.Reader) ([]OSVVulnerability, error) { return unmarshalSingleOrMulti[OSVVulnerability](reader) diff --git a/grype/db/v6/build/transformers/osv/testdata/upstream-only-CVE-2024-99999.json b/grype/db/v6/build/transformers/osv/testdata/upstream-only-CVE-2024-99999.json new file mode 100644 index 00000000000..a6abf85a498 --- /dev/null +++ b/grype/db/v6/build/transformers/osv/testdata/upstream-only-CVE-2024-99999.json @@ -0,0 +1,28 @@ +{ + "schema_version": "1.7.0", + "id": "ECHO-upstream-only-2024-99999", + "details": "Cross-reference appears only via the OSV 1.7 upstream field.", + "upstream": [ + "CVE-2024-99999" + ], + "affected": [ + { + "package": { + "ecosystem": "npm", + "name": "fictional-pkg", + "purl": "pkg:npm/fictional-pkg" + }, + "ranges": [ + { + "type": "SEMVER", + "events": [ + {"introduced": "1.0.0"}, + {"fixed": "1.2.0"} + ] + } + ] + } + ], + "published": "2024-03-01T00:00:00Z", + "modified": "2024-03-02T00:00:00Z" +} diff --git a/grype/db/v6/build/transformers/osv/testdata/upstream-overlap-2024-12345.json b/grype/db/v6/build/transformers/osv/testdata/upstream-overlap-2024-12345.json new file mode 100644 index 00000000000..73a4a873c22 --- /dev/null +++ b/grype/db/v6/build/transformers/osv/testdata/upstream-overlap-2024-12345.json @@ -0,0 +1,32 @@ +{ + "schema_version": "1.7.0", + "id": "ECHO-overlap-2024-12345", + "details": "aliases and upstream overlap on the same CVE; upstream also carries a GHSA.", + "aliases": [ + "CVE-2024-12345" + ], + "upstream": [ + "CVE-2024-12345", + "GHSA-aaaa-bbbb-cccc" + ], + "affected": [ + { + "package": { + "ecosystem": "npm", + "name": "fictional-pkg", + "purl": "pkg:npm/fictional-pkg" + }, + "ranges": [ + { + "type": "SEMVER", + "events": [ + {"introduced": "2.0.0"}, + {"fixed": "2.1.0"} + ] + } + ] + } + ], + "published": "2024-04-01T00:00:00Z", + "modified": "2024-04-02T00:00:00Z" +} diff --git a/grype/db/v6/build/transformers/osv/transform.go b/grype/db/v6/build/transformers/osv/transform.go index b172adc884e..6ce4a5e5e9b 100644 --- a/grype/db/v6/build/transformers/osv/transform.go +++ b/grype/db/v6/build/transformers/osv/transform.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/google/osv-scanner/pkg/models" + "github.com/scylladb/go-set/strset" "github.com/anchore/grype/grype/db/data" "github.com/anchore/grype/grype/db/internal/codename" @@ -32,11 +33,14 @@ func Transform(vulnerability unmarshal.OSVVulnerability, state provider.State) ( } isAdvisory := isAdvisoryRecord(vulnerability) - aliases := vulnerability.Aliases - + var related []string if isAdvisory { - aliases = append(aliases, vulnerability.Related...) + related = vulnerability.Related } + // Upstream is the OSV 1.7+ field carrying CVE/GHSA cross-references for + // records derived from another vulnerability. Several producers use it + // instead of (or in addition to) aliases. + aliases := mergeUniqueStrings(vulnerability.Aliases, vulnerability.Upstream, related) in := []any{ db.VulnerabilityHandle{ @@ -692,3 +696,20 @@ func createUnaffectedRange(fixedVersion string, fixByVersion map[string]db.FixAv }, } } + +// mergeUniqueStrings concatenates groups in order, skipping empty values and +// any later occurrence of a value already seen. +func mergeUniqueStrings(groups ...[]string) []string { + seen := strset.New() + var out []string + for _, g := range groups { + for _, s := range g { + if s == "" || seen.Has(s) { + continue + } + seen.Add(s) + out = append(out, s) + } + } + return out +} diff --git a/grype/db/v6/build/transformers/osv/transform_test.go b/grype/db/v6/build/transformers/osv/transform_test.go index a5046190bec..f6c0c4cbc3d 100644 --- a/grype/db/v6/build/transformers/osv/transform_test.go +++ b/grype/db/v6/build/transformers/osv/transform_test.go @@ -281,6 +281,85 @@ func TestTransform(t *testing.T) { ), }}, }, + { + name: "Upstream only", + fixturePath: "testdata/upstream-only-CVE-2024-99999.json", + want: []transformers.RelatedEntries{{ + VulnerabilityHandle: &db.VulnerabilityHandle{ + Name: "ECHO-upstream-only-2024-99999", + Status: db.VulnerabilityActive, + ProviderID: "osv", + Provider: expectedProvider(), + ModifiedDate: timeRef(time.Date(2024, time.March, 2, 0, 0, 0, 0, time.UTC)), + PublishedDate: timeRef(time.Date(2024, time.March, 1, 0, 0, 0, 0, time.UTC)), + BlobValue: &db.VulnerabilityBlob{ + ID: "ECHO-upstream-only-2024-99999", + Description: "Cross-reference appears only via the OSV 1.7 upstream field.", + Aliases: []string{"CVE-2024-99999"}, + }, + }, + Related: affectedPkgSlice( + db.AffectedPackageHandle{ + Package: &db.Package{ + Name: "fictional-pkg", + Ecosystem: "npm", + }, + BlobValue: &db.PackageBlob{ + Ranges: []db.Range{{ + Version: db.Version{ + Type: "semver", + Constraint: ">=1.0.0,<1.2.0", + }, + Fix: &db.Fix{ + Version: "1.2.0", + State: db.FixedStatus, + }, + }}, + }, + }, + ), + }}, + }, + { + name: "Upstream + Aliases overlap dedup", + fixturePath: "testdata/upstream-overlap-2024-12345.json", + want: []transformers.RelatedEntries{{ + VulnerabilityHandle: &db.VulnerabilityHandle{ + Name: "ECHO-overlap-2024-12345", + Status: db.VulnerabilityActive, + ProviderID: "osv", + Provider: expectedProvider(), + ModifiedDate: timeRef(time.Date(2024, time.April, 2, 0, 0, 0, 0, time.UTC)), + PublishedDate: timeRef(time.Date(2024, time.April, 1, 0, 0, 0, 0, time.UTC)), + BlobValue: &db.VulnerabilityBlob{ + ID: "ECHO-overlap-2024-12345", + Description: "aliases and upstream overlap on the same CVE; upstream also carries a GHSA.", + Aliases: []string{"CVE-2024-12345", "GHSA-aaaa-bbbb-cccc"}, + }, + }, + Related: affectedPkgSlice( + db.AffectedPackageHandle{ + Package: &db.Package{ + Name: "fictional-pkg", + Ecosystem: "npm", + }, + BlobValue: &db.PackageBlob{ + CVEs: []string{"CVE-2024-12345"}, + Ranges: []db.Range{{ + Version: db.Version{ + Type: "semver", + Constraint: ">=2.0.0,<2.1.0", + }, + Fix: &db.Fix{ + Version: "2.1.0", + State: db.FixedStatus, + }, + }}, + }, + }, + ), + }}, + }, } t.Parallel() for _, testToRun := range tests { diff --git a/grype/db/v6/search_query.go b/grype/db/v6/search_query.go index 42c3b4ea160..0fb81b1128c 100644 --- a/grype/db/v6/search_query.go +++ b/grype/db/v6/search_query.go @@ -99,6 +99,9 @@ func (b *searchQueryBuilder) handleEcosystem(c *search.EcosystemCriteria) { // the v6 store normalizes ecosystems around the syft package type, so that field is preferred switch { + case c.ExactEcosystem != "": + // caller asked for a literal ecosystem string (e.g. vendor-prefixed "Echo:PyPi") + b.query.pkgSpec.Ecosystem = c.ExactEcosystem case c.PackageType != "" && c.PackageType != syftPkg.UnknownPkg: // prefer to match by a non-blank, known package type b.query.pkgType = c.PackageType diff --git a/grype/matcher/python/echo.go b/grype/matcher/python/echo.go new file mode 100644 index 00000000000..470a07fad97 --- /dev/null +++ b/grype/matcher/python/echo.go @@ -0,0 +1,150 @@ +package python + +import ( + "fmt" + "regexp" + + "github.com/scylladb/go-set/strset" + + "github.com/anchore/grype/grype/match" + "github.com/anchore/grype/grype/matcher/internal" + "github.com/anchore/grype/grype/matcher/internal/result" + "github.com/anchore/grype/grype/pkg" + "github.com/anchore/grype/grype/search" + "github.com/anchore/grype/grype/version" + "github.com/anchore/grype/grype/vulnerability" +) + +// echoPatchedVersionMarker matches the PEP 440 local-version segment Echo adds +// to its patched python packages (e.g. "5.2.1+echo.1"). When this marker is +// present on an installed package, the python matcher also searches the +// "Echo:PyPi" ecosystem so Echo's vulnerability records are considered, and +// uses Echo's records to suppress upstream CVEs/GHSAs Echo has backported. +// Non-Echo Python installs are unaffected: no marker, no extra work. +var echoPatchedVersionMarker = regexp.MustCompile(`\+echo\.\d+`) + +const echoPythonEcosystem = "Echo:PyPi" + +func hasEchoPatchMarker(v string) bool { + return echoPatchedVersionMarker.MatchString(v) +} + +// matchEchoPython surfaces Echo vulnerability records that affect the installed +// version (i.e., installed < Echo fix). +func matchEchoPython(store vulnerability.Provider, p pkg.Package, matcherType match.MatcherType) ([]match.Match, []match.IgnoreFilter, error) { + var matches []match.Match + var ignored []match.IgnoreFilter + + for _, packageName := range store.PackageSearchNames(p) { + nameMatches, nameIgnores, err := matchEchoByName(store, p, packageName, matcherType) + if err != nil { + return nil, nil, err + } + matches = append(matches, nameMatches...) + ignored = append(ignored, nameIgnores...) + } + return matches, ignored, nil +} + +func matchEchoByName(vp vulnerability.Provider, p pkg.Package, packageName string, matcherType match.MatcherType) ([]match.Match, []match.IgnoreFilter, error) { + provider := result.NewProvider(vp, p, matcherType) + criteria := []vulnerability.Criteria{ + search.ByExactEcosystem(echoPythonEcosystem), + search.ByPackageName(packageName), + internal.OnlyQualifiedPackages(p), + internal.OnlyVulnerableVersions(version.New(p.Version, pkg.VersionFormat(p))), + internal.OnlyNonWithdrawnVulnerabilities(), + } + disclosures, err := provider.FindResults(criteria...) + if err != nil { + return nil, nil, fmt.Errorf("echo matcher failed to fetch disclosures for %q: %w", p.Name, err) + } + return disclosures.ToMatches(), nil, nil +} + +// echoSuppressionIDs returns the set of upstream CVE/GHSA identifiers that +// Echo has backported a fix for AND the installed version is at-or-above the +// Echo fix. Callers use this to drop upstream matches that Echo has already +// patched on this system. +// +// We query every Echo:PyPi record for the package — not just versions matching +// the installed package — then keep those whose fix version is <= installed. +// Each such record's aliases (originally Echo's `upstream` field) are added to +// the suppression set. +func echoSuppressionIDs(store vulnerability.Provider, p pkg.Package) (*strset.Set, error) { + installed := version.New(p.Version, pkg.VersionFormat(p)) + suppressed := strset.New() + + for _, packageName := range store.PackageSearchNames(p) { + records, err := findAllEchoRecords(store, p, packageName) + if err != nil { + return nil, err + } + for _, v := range records { + if !echoFixApplied(v, installed) { + continue + } + for _, related := range v.RelatedVulnerabilities { + if related.ID == "" { + continue + } + suppressed.Add(related.ID) + } + } + } + return suppressed, nil +} + +func findAllEchoRecords(vp vulnerability.Provider, p pkg.Package, packageName string) ([]vulnerability.Vulnerability, error) { + criteria := []vulnerability.Criteria{ + search.ByExactEcosystem(echoPythonEcosystem), + search.ByPackageName(packageName), + internal.OnlyQualifiedPackages(p), + internal.OnlyNonWithdrawnVulnerabilities(), + } + vulns, err := vp.FindVulnerabilities(criteria...) + if err != nil { + return nil, fmt.Errorf("echo matcher failed to enumerate records for %q: %w", p.Name, err) + } + return vulns, nil +} + +// echoFixApplied reports whether the installed package version is at or above +// the Echo record's fix version. An Echo record with no fix version is not +// usable as a suppression source. +func echoFixApplied(v vulnerability.Vulnerability, installed *version.Version) bool { + fixVersions := v.Fix.Versions + if len(fixVersions) == 0 { + return false + } + for _, fixRaw := range fixVersions { + if fixRaw == "" { + continue + } + fixVer := version.New(fixRaw, installed.Format) + cmp, err := installed.Compare(fixVer) + if err != nil { + continue + } + if cmp >= 0 { + return true + } + } + return false +} + +// dropSuppressedMatches removes any match whose primary ID is in the +// suppression set. +func dropSuppressedMatches(matches []match.Match, suppressed *strset.Set) []match.Match { + if suppressed == nil || suppressed.Size() == 0 { + return matches + } + out := matches[:0] + for _, m := range matches { + if suppressed.Has(m.Vulnerability.ID) { + continue + } + out = append(out, m) + } + return out +} diff --git a/grype/matcher/python/echo_test.go b/grype/matcher/python/echo_test.go new file mode 100644 index 00000000000..1f694c4b318 --- /dev/null +++ b/grype/matcher/python/echo_test.go @@ -0,0 +1,38 @@ +package python + +import "testing" + +func TestHasEchoPatchMarker(t *testing.T) { + cases := []struct { + version string + want bool + }{ + // positive: PEP 440 local segment with echo.N + {"5.2.1+echo.1", true}, + {"5.2.1+echo.42", true}, + {"0.0.0+echo.0", true}, + {"1!2.3.4+echo.7", true}, // PEP 440 epoch prefix + {"5.2.1.post1+echo.1", true}, + {"5.2.1-dev0+echo.2", true}, + {"5.2.1+ubuntu.1+echo.3", true}, // marker appears after another local label + + // negative: no echo marker + {"5.2.1", false}, + {"5.2.1+echo", false}, // missing .N + {"5.2.1+echo.", false}, // missing digits + {"5.2.1+echox.1", false}, // not exactly "echo" + {"5.2.1+ECHO.1", false}, // case-sensitive (PEP 440 local segments are lowercased) + {"5.2.1+local", false}, + {"5.2.1+echo1", false}, // missing dot + {"", false}, + } + + for _, tc := range cases { + t.Run(tc.version, func(t *testing.T) { + got := hasEchoPatchMarker(tc.version) + if got != tc.want { + t.Errorf("hasEchoPatchMarker(%q) = %v, want %v", tc.version, got, tc.want) + } + }) + } +} diff --git a/grype/matcher/python/matcher.go b/grype/matcher/python/matcher.go index 56111525318..072546ef90c 100644 --- a/grype/matcher/python/matcher.go +++ b/grype/matcher/python/matcher.go @@ -31,5 +31,25 @@ func (m *Matcher) Type() match.MatcherType { } func (m *Matcher) Match(store vulnerability.Provider, p pkg.Package) ([]match.Match, []match.IgnoreFilter, error) { - return internal.MatchPackageByEcosystemAndCPEs(store, p, m.Type(), m.cfg.UseCPEs) + matches, ignores, err := internal.MatchPackageByEcosystemAndCPEs(store, p, m.Type(), m.cfg.UseCPEs) + if err != nil { + return nil, nil, err + } + + if hasEchoPatchMarker(p.Version) { + suppressed, err := echoSuppressionIDs(store, p) + if err != nil { + return nil, nil, err + } + matches = dropSuppressedMatches(matches, suppressed) + + echoMatches, echoIgnores, err := matchEchoPython(store, p, m.Type()) + if err != nil { + return nil, nil, err + } + matches = append(matches, echoMatches...) + ignores = append(ignores, echoIgnores...) + } + + return matches, ignores, nil } diff --git a/grype/search/ecosystem.go b/grype/search/ecosystem.go index 17aa8e8df91..4bdc31b1cdb 100644 --- a/grype/search/ecosystem.go +++ b/grype/search/ecosystem.go @@ -17,9 +17,20 @@ func ByEcosystem(lang syftPkg.Language, t syftPkg.Type) vulnerability.Criteria { } } +// ByExactEcosystem returns criteria that searches with a literal ecosystem +// string as stored in the DB, bypassing syft Language/Type normalization. Use +// this for vendor-prefixed ecosystems (e.g. "Echo:PyPi") that aren't reachable +// via the standard package-type mapping. +func ByExactEcosystem(ecosystem string) vulnerability.Criteria { + return &EcosystemCriteria{ + ExactEcosystem: ecosystem, + } +} + type EcosystemCriteria struct { - Language syftPkg.Language - PackageType syftPkg.Type + Language syftPkg.Language + PackageType syftPkg.Type + ExactEcosystem string } func (c *EcosystemCriteria) MatchesVulnerability(value vulnerability.Vulnerability) (bool, string, error) {