diff --git a/alembic/versions/b4f8c1d2e3a4_add_registry_version_artifact_hash.py b/alembic/versions/b4f8c1d2e3a4_add_registry_version_artifact_hash.py new file mode 100644 index 0000000000..82d4e90a6f --- /dev/null +++ b/alembic/versions/b4f8c1d2e3a4_add_registry_version_artifact_hash.py @@ -0,0 +1,35 @@ +"""Add registry version artifact hash + +Revision ID: b4f8c1d2e3a4 +Revises: a3d7c9e8b4f2 +Create Date: 2026-05-23 00:00:00.000000 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "b4f8c1d2e3a4" +down_revision: str | None = "a3d7c9e8b4f2" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.add_column( + "registry_version", + sa.Column("artifact_hash", sa.String(), nullable=True), + ) + op.add_column( + "platform_registry_version", + sa.Column("artifact_hash", sa.String(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("platform_registry_version", "artifact_hash") + op.drop_column("registry_version", "artifact_hash") diff --git a/frontend/src/client/schemas.gen.ts b/frontend/src/client/schemas.gen.ts index f8550ed32a..6fe241611c 100644 --- a/frontend/src/client/schemas.gen.ts +++ b/frontend/src/client/schemas.gen.ts @@ -17961,9 +17961,9 @@ Attributes: Example: {"tracecat_registry": "2024.12.10.123456"} actions: Maps action name to its source origin. Example: {"core.transform.reshape": "tracecat_registry"} - origin_fingerprints: Optional immutable manifest fingerprints for origins. - New executors use the builtin fingerprint to decide whether their - bundled tracecat_registry package is an exact match for the lock.`, + origin_fingerprints: Optional immutable origin fingerprints. New locks + prefer execution artifact SHA-256 hashes and fall back to manifest + fingerprints when older versions do not have artifact hashes.`, } as const export const $RegistryOAuthSecret = { @@ -29968,6 +29968,17 @@ export const $tracecat__admin__registry__schemas__RegistryVersionRead = { ], title: "Tarball Uri", }, + artifact_hash: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Artifact Hash", + }, created_at: { type: "string", format: "date-time", @@ -30240,6 +30251,17 @@ export const $tracecat__registry__repositories__schemas__RegistryVersionRead = { ], title: "Tarball Uri", }, + artifact_hash: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Artifact Hash", + }, created_at: { type: "string", format: "date-time", diff --git a/frontend/src/client/types.gen.ts b/frontend/src/client/types.gen.ts index 342c9ef340..dfb1e7b0aa 100644 --- a/frontend/src/client/types.gen.ts +++ b/frontend/src/client/types.gen.ts @@ -5526,9 +5526,9 @@ export type RegistryArtifactsBackfillStartResponse = { * Example: {"tracecat_registry": "2024.12.10.123456"} * actions: Maps action name to its source origin. * Example: {"core.transform.reshape": "tracecat_registry"} - * origin_fingerprints: Optional immutable manifest fingerprints for origins. - * New executors use the builtin fingerprint to decide whether their - * bundled tracecat_registry package is an exact match for the lock. + * origin_fingerprints: Optional immutable origin fingerprints. New locks + * prefer execution artifact SHA-256 hashes and fall back to manifest + * fingerprints when older versions do not have artifact hashes. */ export type RegistryLock = { origins: { @@ -8905,6 +8905,7 @@ export type tracecat__admin__registry__schemas__RegistryVersionRead = { version: string commit_sha: string | null tarball_uri: string | null + artifact_hash?: string | null created_at: string is_current?: boolean artifacts_ready?: boolean @@ -8964,6 +8965,7 @@ export type tracecat__registry__repositories__schemas__RegistryVersionRead = { version: string commit_sha: string | null tarball_uri: string | null + artifact_hash?: string | null created_at: string } diff --git a/tests/unit/executor/test_registry_artifact_lookup.py b/tests/unit/executor/test_registry_artifact_lookup.py new file mode 100644 index 0000000000..645996ec65 --- /dev/null +++ b/tests/unit/executor/test_registry_artifact_lookup.py @@ -0,0 +1,219 @@ +from __future__ import annotations + +import uuid +from collections.abc import Sequence +from types import TracebackType +from typing import Any + +import pytest + +from tracecat.exceptions import RegistryValidationError +from tracecat.executor.service import ( + RegistryArtifactsContext, + get_registry_artifacts_for_lock, +) +from tracecat.registry.versions.schemas import ( + RegistryVersionManifest, + registry_manifest_fingerprint, +) + +type ArtifactRow = tuple[str, str, str | None, str | None, dict[str, Any]] + + +class _FakeResult: + def __init__(self, rows: Sequence[ArtifactRow]) -> None: + self._rows = rows + + def all(self) -> Sequence[ArtifactRow]: + return self._rows + + +class _FakeSession: + def __init__(self, rows: Sequence[ArtifactRow]) -> None: + self._rows = rows + + async def execute(self, _statement: object) -> _FakeResult: + return _FakeResult(self._rows) + + +class _FakeSessionManager: + def __init__(self, rows: Sequence[ArtifactRow]) -> None: + self._rows = rows + + async def __aenter__(self) -> _FakeSession: + return _FakeSession(self._rows) + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: TracebackType | None, + ) -> None: + pass + + +def _manifest_dict(manifest: RegistryVersionManifest) -> dict[str, Any]: + return manifest.model_dump(mode="json") + + +def _artifact_row( + *, + origin: str, + version: str, + artifact_hash: str | None, + manifest: RegistryVersionManifest, +) -> ArtifactRow: + return ( + origin, + version, + f"s3://bucket/{uuid.uuid4().hex}/site-packages.squashfs", + artifact_hash, + _manifest_dict(manifest), + ) + + +def _patch_artifact_lookup_session( + monkeypatch: pytest.MonkeyPatch, + rows: Sequence[ArtifactRow], +) -> None: + monkeypatch.setattr( + "tracecat.executor.service.get_async_session_bypass_rls_context_manager", + lambda: _FakeSessionManager(rows), + ) + + +@pytest.mark.anyio +async def test_artifact_hash_lock_rejects_current_db_hash_rewrite( + monkeypatch: pytest.MonkeyPatch, +) -> None: + origin = "git+ssh://github.com/example/custom.git" + version = "v1" + locked_hash = "a" * 64 + current_db_hash = "b" * 64 + _patch_artifact_lookup_session( + monkeypatch, + [ + _artifact_row( + origin=origin, + version=version, + artifact_hash=current_db_hash, + manifest=RegistryVersionManifest(), + ) + ], + ) + + with pytest.raises( + RegistryValidationError, + match="Locked registry artifact fingerprint mismatch", + ): + await get_registry_artifacts_for_lock( + origins={origin: version}, + organization_id=uuid.uuid4(), + origin_fingerprints={origin: locked_hash}, + ) + + +@pytest.mark.anyio +async def test_artifact_hash_lock_preserves_matching_locked_hash( + monkeypatch: pytest.MonkeyPatch, +) -> None: + origin = "git+ssh://github.com/example/custom.git" + version = "v1" + locked_hash = "a" * 64 + _patch_artifact_lookup_session( + monkeypatch, + [ + _artifact_row( + origin=origin, + version=version, + artifact_hash=locked_hash, + manifest=RegistryVersionManifest(), + ) + ], + ) + + artifacts = await get_registry_artifacts_for_lock( + origins={origin: version}, + organization_id=uuid.uuid4(), + origin_fingerprints={origin: locked_hash}, + ) + + assert artifacts == [ + RegistryArtifactsContext( + origin=origin, + version=version, + artifact_uri=artifacts[0].artifact_uri, + artifact_hash=locked_hash, + ) + ] + + +@pytest.mark.anyio +async def test_manifest_fingerprint_lock_allows_current_artifact_hash( + monkeypatch: pytest.MonkeyPatch, +) -> None: + origin = "git+ssh://github.com/example/custom.git" + version = "v1" + artifact_hash = "b" * 64 + manifest = RegistryVersionManifest() + manifest_fingerprint = registry_manifest_fingerprint(manifest) + _patch_artifact_lookup_session( + monkeypatch, + [ + _artifact_row( + origin=origin, + version=version, + artifact_hash=artifact_hash, + manifest=manifest, + ) + ], + ) + + artifacts = await get_registry_artifacts_for_lock( + origins={origin: version}, + organization_id=uuid.uuid4(), + origin_fingerprints={origin: manifest_fingerprint}, + ) + + assert artifacts == [ + RegistryArtifactsContext( + origin=origin, + version=version, + artifact_uri=artifacts[0].artifact_uri, + artifact_hash=artifact_hash, + ) + ] + + +@pytest.mark.anyio +async def test_lookup_without_lock_fingerprint_uses_current_artifact_hash( + monkeypatch: pytest.MonkeyPatch, +) -> None: + origin = "git+ssh://github.com/example/custom.git" + version = "v1" + artifact_hash = "c" * 64 + _patch_artifact_lookup_session( + monkeypatch, + [ + _artifact_row( + origin=origin, + version=version, + artifact_hash=artifact_hash, + manifest=RegistryVersionManifest(), + ) + ], + ) + + artifacts = await get_registry_artifacts_for_lock( + origins={origin: version}, + organization_id=uuid.uuid4(), + ) + + assert artifacts == [ + RegistryArtifactsContext( + origin=origin, + version=version, + artifact_uri=artifacts[0].artifact_uri, + artifact_hash=artifact_hash, + ) + ] diff --git a/tests/unit/executor/test_registry_helpers.py b/tests/unit/executor/test_registry_helpers.py index c427e18c00..0ce4503164 100644 --- a/tests/unit/executor/test_registry_helpers.py +++ b/tests/unit/executor/test_registry_helpers.py @@ -44,6 +44,7 @@ def test_sort_registry_artifact_uris_orders_builtin_first() -> None: origin="tracecat_registry", version="v1", artifact_uri="s3://bucket/builtin.tar.gz", + artifact_hash="a" * 64, ), RegistryArtifactsContext( origin="git+ssh://github.com/example/a.git", @@ -53,7 +54,7 @@ def test_sort_registry_artifact_uris_orders_builtin_first() -> None: ] assert sort_registry_artifact_uris(artifacts) == [ - "s3://bucket/builtin.tar.gz", + f"s3://bucket/builtin.tar.gz#sha256={'a' * 64}", "s3://bucket/a.tar.gz", "s3://bucket/b.tar.gz", ] @@ -129,19 +130,58 @@ async def fail_lookup(*_args, **_kwargs): ] +@pytest.mark.anyio +async def test_get_registry_artifact_uris_uses_bundled_builtin_on_artifact_hash_match( + test_role: Role, monkeypatch: pytest.MonkeyPatch +) -> None: + current_version = "1.2.3" + artifact_hash = "a" * 64 + input_data = cast( + RunActionInput, + SimpleNamespace( + registry_lock=RegistryLock( + origins={DEFAULT_REGISTRY_ORIGIN: current_version}, + actions={}, + origin_fingerprints={DEFAULT_REGISTRY_ORIGIN: artifact_hash}, + ) + ), + ) + + async def fail_lookup(*_args, **_kwargs): + pytest.fail("matching builtin registry should not query artifact storage") + + monkeypatch.setattr( + "tracecat.executor.backends.registry_helpers.tracecat_registry.__version__", + current_version, + ) + monkeypatch.setattr( + "tracecat.executor.backends.registry_helpers.load_prebuilt_builtin_registry_artifact_metadata", + lambda **_kwargs: SimpleNamespace(artifact_hash=artifact_hash), + ) + monkeypatch.setattr( + "tracecat.executor.backends.registry_helpers.get_registry_artifacts_for_lock", + fail_lookup, + ) + + assert await get_registry_artifact_uris(input_data, test_role) == [ + bundled_builtin_registry_uri(current_version) + ] + + @pytest.mark.anyio async def test_get_registry_artifact_uris_looks_up_builtin_on_fingerprint_mismatch( test_role: Role, monkeypatch: pytest.MonkeyPatch ) -> None: current_version = "1.2.3" artifact_uri = "s3://bucket/builtin/site-packages.squashfs" + locked_artifact_hash = "b" * 64 input_data = cast( RunActionInput, SimpleNamespace( registry_lock=RegistryLock( origins={DEFAULT_REGISTRY_ORIGIN: current_version}, actions={}, - origin_fingerprints={DEFAULT_REGISTRY_ORIGIN: "different"}, + origin_fingerprints={DEFAULT_REGISTRY_ORIGIN: locked_artifact_hash}, ) ), ) @@ -149,14 +189,18 @@ async def test_get_registry_artifact_uris_looks_up_builtin_on_fingerprint_mismat async def get_artifacts( origins: dict[str, str], organization_id: uuid.UUID, + *, + origin_fingerprints: dict[str, str] | None = None, ) -> list[RegistryArtifactsContext]: assert origins == {DEFAULT_REGISTRY_ORIGIN: current_version} assert organization_id == test_role.organization_id + assert origin_fingerprints == {DEFAULT_REGISTRY_ORIGIN: locked_artifact_hash} return [ RegistryArtifactsContext( origin=DEFAULT_REGISTRY_ORIGIN, version=current_version, artifact_uri=artifact_uri, + artifact_hash=locked_artifact_hash, ) ] @@ -173,7 +217,9 @@ async def get_artifacts( get_artifacts, ) - assert await get_registry_artifact_uris(input_data, test_role) == [artifact_uri] + assert await get_registry_artifact_uris(input_data, test_role) == [ + f"{artifact_uri}#sha256={locked_artifact_hash}" + ] @pytest.mark.anyio @@ -196,9 +242,11 @@ async def test_get_registry_artifact_uris_looks_up_builtin_when_manifest_missing async def get_artifacts( origins: dict[str, str], organization_id: uuid.UUID, + origin_fingerprints: dict[str, str] | None = None, ) -> list[RegistryArtifactsContext]: assert origins == {DEFAULT_REGISTRY_ORIGIN: current_version} assert organization_id == test_role.organization_id + assert origin_fingerprints == {DEFAULT_REGISTRY_ORIGIN: "expected"} return [ RegistryArtifactsContext( origin=DEFAULT_REGISTRY_ORIGIN, @@ -244,9 +292,12 @@ async def test_get_registry_artifact_uris_looks_up_only_non_current_origins( async def get_artifacts( origins: dict[str, str], organization_id: uuid.UUID, + *, + origin_fingerprints: dict[str, str] | None = None, ) -> list[RegistryArtifactsContext]: assert origins == {custom_origin: "abc123"} assert organization_id == test_role.organization_id + assert origin_fingerprints is None return [ RegistryArtifactsContext( origin=custom_origin, diff --git a/tests/unit/test_registry_artifacts.py b/tests/unit/test_registry_artifacts.py index 6954ef57ae..ff7d503cda 100644 --- a/tests/unit/test_registry_artifacts.py +++ b/tests/unit/test_registry_artifacts.py @@ -20,6 +20,8 @@ TarballArtifact, bundled_builtin_registry_uri, compute_registry_artifact_cache_key, + registry_artifact_ref, + split_registry_artifact_ref, ) from tracecat.registry.artifact_keys import parse_s3_uri @@ -82,6 +84,15 @@ def test_compute_registry_artifact_cache_key_case_sensitive(self): assert key1 != key2 + def test_compute_registry_artifact_cache_key_includes_hash_fragment(self): + uri = "s3://bucket/path/site-packages.squashfs" + ref = registry_artifact_ref(uri, "a" * 64) + + assert compute_registry_artifact_cache_key(ref) != ( + compute_registry_artifact_cache_key(uri) + ) + assert split_registry_artifact_ref(ref) == (uri, "a" * 64) + def test_compute_registry_artifact_cache_key_empty(self): """Test that empty URI returns the base cache key.""" assert compute_registry_artifact_cache_key("") == "base" @@ -104,7 +115,9 @@ async def mock_download_file_to_path( key: str, bucket: str, output_path: Path, + expected_sha256: str | None = None, ) -> None: + assert expected_sha256 is None output_path.write_bytes(b"squashfs") with patch( @@ -121,6 +134,68 @@ async def mock_download_file_to_path( assert await_args.kwargs["bucket"] == "bucket" assert output_path.read_bytes() == b"squashfs" + @pytest.mark.anyio + async def test_download_artifact_verifies_expected_hash(self, temp_cache_dir): + cache = RegistryArtifactCache(temp_cache_dir) + artifact = SquashfsArtifact( + uri="s3://bucket/path/site-packages.squashfs", + cache_key="download-test", + expected_hash="a" * 64, + ) + ctx = cache._context_for(artifact.cache_key) + output_path = temp_cache_dir / "artifact.squashfs" + + async def mock_download_file_to_path( + *, + key: str, + bucket: str, + output_path: Path, + expected_sha256: str | None = None, + ) -> None: + assert expected_sha256 == "a" * 64 + output_path.write_bytes(b"squashfs") + + with patch( + "tracecat.executor.registry_artifacts.blob.download_file_to_path", + new_callable=AsyncMock, + side_effect=mock_download_file_to_path, + ): + await artifact.download(ctx, output_path) + + assert output_path.read_bytes() == b"squashfs" + + @pytest.mark.anyio + async def test_download_tarball_artifact_verifies_expected_hash( + self, temp_cache_dir + ): + cache = RegistryArtifactCache(temp_cache_dir) + artifact = TarballArtifact( + uri="s3://bucket/path/site-packages.tar.gz", + cache_key="download-test", + expected_hash="a" * 64, + ) + ctx = cache._context_for(artifact.cache_key) + output_path = temp_cache_dir / "artifact.tar.gz" + + async def mock_download_file_to_path( + *, + key: str, + bucket: str, + output_path: Path, + expected_sha256: str | None = None, + ) -> None: + assert expected_sha256 == "a" * 64 + output_path.write_bytes(b"tarball") + + with patch( + "tracecat.executor.registry_artifacts.blob.download_file_to_path", + new_callable=AsyncMock, + side_effect=mock_download_file_to_path, + ): + await artifact.download(ctx, output_path) + + assert output_path.read_bytes() == b"tarball" + @pytest.mark.anyio async def test_ensure_environment_uses_bundled_current_builtin( self, temp_cache_dir, monkeypatch: pytest.MonkeyPatch @@ -320,6 +395,56 @@ async def test_artifact_candidates_direct_squashfs_include_gzip_fallback( ] can_try_squashfs.assert_not_called() + @pytest.mark.anyio + async def test_artifact_candidates_direct_squashfs_hash_blocks_gzip_fallback( + self, temp_cache_dir + ): + """Hash-locked SquashFS artifacts must not fall back to unverified tarballs.""" + cache = RegistryArtifactCache(temp_cache_dir) + + cache_key = compute_registry_artifact_cache_key( + "s3://bucket/path/site-packages.squashfs" + ) + ctx = cache._context_for(cache_key) + candidates = await cache._artifact_candidates( + ctx, + "s3://bucket/path/site-packages.squashfs", + expected_hash="a" * 64, + ) + + assert len(candidates) == 1 + assert isinstance(candidates[0], SquashfsArtifact) + assert candidates[0].expected_hash == "a" * 64 + + @pytest.mark.anyio + async def test_artifact_candidates_tarball_hash_blocks_squashfs_sidecar( + self, temp_cache_dir + ): + """Hash-locked tarballs must not prefer an unverified SquashFS sidecar.""" + cache = RegistryArtifactCache(temp_cache_dir) + + with ( + patch( + "tracecat.executor.registry_artifacts.blob.file_exists", + new_callable=AsyncMock, + ) as file_exists, + patch.object(cache, "_can_try_squashfs", return_value=True), + ): + cache_key = compute_registry_artifact_cache_key( + "s3://bucket/path/site-packages.tar.gz" + ) + ctx = cache._context_for(cache_key) + candidates = await cache._artifact_candidates( + ctx, + "s3://bucket/path/site-packages.tar.gz", + expected_hash="a" * 64, + ) + + assert len(candidates) == 1 + assert isinstance(candidates[0], TarballArtifact) + assert candidates[0].expected_hash == "a" * 64 + file_exists.assert_not_awaited() + @pytest.mark.anyio async def test_artifact_candidates_fall_back_to_gzip(self, temp_cache_dir): """Test that gzip tarballs are used when no sidecar exists.""" diff --git a/tests/unit/test_registry_lock_service.py b/tests/unit/test_registry_lock_service.py index 45bd36bb40..94f884c08b 100644 --- a/tests/unit/test_registry_lock_service.py +++ b/tests/unit/test_registry_lock_service.py @@ -250,6 +250,36 @@ async def test_resolve_lock_queries_platform_registry( ) +@pytest.mark.anyio +async def test_resolve_lock_prefers_artifact_hash_for_origin_fingerprint( + svc_role: Role, + session: AsyncSession, +) -> None: + origin = "hashed_platform_registry" + platform_repo = PlatformRegistryRepository(origin=origin) + session.add(platform_repo) + await session.flush() + + platform_version = PlatformRegistryVersion( + repository_id=platform_repo.id, + version="1.0.0", + manifest=_make_manifest(["platform.hashed_action"]), + tarball_uri="s3://platform/v1.tar.gz", + artifact_hash="a" * 64, + ) + session.add(platform_version) + await session.flush() + + platform_repo.current_version_id = platform_version.id + session.add(platform_repo) + await session.commit() + + service = RegistryLockService(session, role=svc_role) + lock = await service.resolve_lock_with_bindings({"platform.hashed_action"}) + + assert lock.origin_fingerprints[origin] == "a" * 64 + + @pytest.mark.anyio async def test_resolve_lock_org_overrides_platform( svc_role: Role, diff --git a/tests/unit/test_registry_platform_startup.py b/tests/unit/test_registry_platform_startup.py index e22f124e43..b049263d47 100644 --- a/tests/unit/test_registry_platform_startup.py +++ b/tests/unit/test_registry_platform_startup.py @@ -517,12 +517,16 @@ async def test_promote_after_artifact_build_promotes_when_current_matches_guard( session, target_version="2.0.0", version_id=new_version.id, - artifact_uri="s3://test/v2.tar.gz", expected_current_version_id=old_version.id, + artifact_uri="s3://test/v2-built.squashfs", + artifact_hash="b" * 64, ) await session.refresh(repo) + await session.refresh(new_version) assert repo.current_version_id == new_version.id + assert new_version.tarball_uri == "s3://test/v2-built.squashfs" + assert new_version.artifact_hash == "b" * 64 @pytest.mark.anyio @@ -559,12 +563,14 @@ async def test_promote_after_artifact_build_updates_stored_artifact_uri( version_id=pending_version.id, artifact_uri=artifact_uri, expected_current_version_id=old_version.id, + artifact_hash="b" * 64, ) await session.refresh(repo) await session.refresh(pending_version) assert repo.current_version_id == pending_version.id assert pending_version.tarball_uri == artifact_uri + assert pending_version.artifact_hash == "b" * 64 @pytest.mark.anyio @@ -600,10 +606,57 @@ async def test_promote_after_artifact_build_promotes_collision_version( version_id=collision_version.id, artifact_uri="s3://test/collision.tar.gz", expected_current_version_id=old_version.id, + artifact_hash="c" * 64, ) await session.refresh(repo) + await session.refresh(collision_version) assert repo.current_version_id == collision_version.id + assert collision_version.artifact_hash == "c" * 64 + + +@pytest.mark.anyio +async def test_promote_after_artifact_build_preserves_existing_hash_when_reusing_object( + session: AsyncSession, +) -> None: + from tracecat.registry.sync.jobs import ( + _promote_platform_registry_version_after_artifact_build, + ) + + repo = await _get_or_create_platform_repo(session) + old_version = PlatformRegistryVersion( + repository_id=repo.id, + version="1.0.0", + manifest=_make_manifest(["test.action_v1"]), + tarball_uri="s3://test/v1.tar.gz", + ) + new_version = PlatformRegistryVersion( + repository_id=repo.id, + version="2.0.0", + manifest=_make_manifest(["test.action_v2"]), + tarball_uri="s3://test/v2.tar.gz", + artifact_hash="a" * 64, + ) + session.add_all([old_version, new_version]) + await session.flush() + repo.current_version_id = old_version.id + session.add(repo) + await session.commit() + + await _promote_platform_registry_version_after_artifact_build( + session, + target_version="2.0.0", + version_id=new_version.id, + expected_current_version_id=old_version.id, + artifact_uri="s3://test/v2-built.squashfs", + artifact_hash=None, + ) + + await session.refresh(repo) + await session.refresh(new_version) + assert repo.current_version_id == new_version.id + assert new_version.tarball_uri == "s3://test/v2-built.squashfs" + assert new_version.artifact_hash == "a" * 64 @pytest.mark.anyio @@ -643,8 +696,9 @@ async def test_promote_after_artifact_build_skips_when_current_changed( session, target_version="2.0.0", version_id=pending_version.id, - artifact_uri="s3://test/v2.tar.gz", expected_current_version_id=old_version.id, + artifact_uri="s3://test/v2-built.squashfs", + artifact_hash="b" * 64, ) await session.refresh(repo) diff --git a/tests/unit/test_registry_sync_artifact.py b/tests/unit/test_registry_sync_artifact.py index 5ca6c1faa6..07b573f05b 100644 --- a/tests/unit/test_registry_sync_artifact.py +++ b/tests/unit/test_registry_sync_artifact.py @@ -176,6 +176,32 @@ def test_create_squashfs_image_makes_mount_root_traversable( def fake_subprocess_run(cmd: list[str]) -> subprocess.CompletedProcess[str]: staging_dir = Path(cmd[1]) assert staging_dir.stat().st_mode & 0o777 == 0o755 + assert cmd == [ + "/usr/bin/mksquashfs", + str(staging_dir), + str(image_path), + "-noappend", + "-comp", + "gzip", + "-Xcompression-level", + "6", + "-no-xattrs", + "-all-root", + "-root-mode", + "755", + "-reproducible", + "-mkfs-time", + "0", + "-all-time", + "0", + "-root-time", + "0", + "-no-hardlinks", + "-processors", + "2", + "-mem", + "256M", + ] assert cmd[-4:] == ["-processors", "2", "-mem", "256M"] image_path.write_bytes(b"squashfs") return subprocess.CompletedProcess(cmd, 0, "", "") @@ -188,6 +214,51 @@ def fake_subprocess_run(cmd: list[str]) -> subprocess.CompletedProcess[str]: ) +def test_create_squashfs_image_stages_entries_in_deterministic_order( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Stage top-level entries by arcname for reproducible SquashFS output.""" + source = tmp_path / "source" + source.mkdir() + for name in ("b.py", "a.py", "c.py"): + (source / name).write_text(f"{name}\n") + image_path = tmp_path / "site-packages.squashfs" + + monkeypatch.setattr( + artifact.config, "TRACECAT__REGISTRY_SYNC_SQUASHFS_ENABLED", True + ) + monkeypatch.setattr(artifact.shutil, "which", lambda _name: "/usr/bin/mksquashfs") + + staged_arcnames: list[str] = [] + + def fake_copy_squashfs_entry( + path: Path, + dest: Path, + *, + preserve_symlinks: bool, # noqa: ARG001 + ) -> None: + staged_arcnames.append(dest.name) + dest.write_text(path.read_text()) + + def fake_subprocess_run(cmd: list[str]) -> subprocess.CompletedProcess[str]: + image_path.write_bytes(b"squashfs") + return subprocess.CompletedProcess(cmd, 0, "", "") + + monkeypatch.setattr(artifact, "_copy_squashfs_entry", fake_copy_squashfs_entry) + monkeypatch.setattr(artifact, "subprocess_run", fake_subprocess_run) + + assert artifact._create_squashfs_image( + image_path, + [ + (source / "b.py", "b.py"), + (source / "a.py", "a.py"), + (source / "c.py", "c.py"), + ], + ) + assert staged_arcnames == ["a.py", "b.py", "c.py"] + + def test_create_squashfs_image_makes_restrictive_files_readable( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/unit/test_registry_sync_base_service.py b/tests/unit/test_registry_sync_base_service.py index b3af1cb3d9..67b8f3fc1b 100644 --- a/tests/unit/test_registry_sync_base_service.py +++ b/tests/unit/test_registry_sync_base_service.py @@ -1,5 +1,6 @@ from __future__ import annotations +import hashlib import uuid from pathlib import Path from types import SimpleNamespace @@ -30,7 +31,11 @@ from tracecat.registry.sync.artifact import RegistryArtifactBuildResult from tracecat.registry.sync.base_service import ArtifactsBuildResult from tracecat.registry.sync.platform_service import PlatformRegistrySyncService -from tracecat.registry.sync.prebuilt import write_prebuilt_registry_manifest +from tracecat.registry.sync.prebuilt import ( + PrebuiltRegistryArtifactMetadata, + write_prebuilt_registry_artifact_metadata, + write_prebuilt_registry_manifest, +) from tracecat.registry.sync.runner import RegistrySyncValidationError from tracecat.registry.sync.service import RegistrySyncError, RegistrySyncService from tracecat.registry.versions.schemas import RegistryVersionManifest @@ -139,7 +144,8 @@ async def fake_build_and_upload_artifacts( ) -> ArtifactsBuildResult: del origin, commit_sha, ssh_env return ArtifactsBuildResult( - artifact_uri=f"s3://test-bucket/{version_string}/site-packages.squashfs" + artifact_uri=f"s3://test-bucket/{version_string}/site-packages.squashfs", + artifact_hash="a" * 64, ) mocker.patch.object( @@ -196,6 +202,25 @@ async def test_platform_builtin_sync_reuses_existing_artifact_objects( "tracecat.registry.sync.base_service.upload_squashfs_venv", mocker.AsyncMock(), ) + existing_artifact = b"existing-squashfs-artifact" + + async def fake_download_file_to_path( + *, + key: str, + bucket: str, + output_path: Path, + ) -> int: + assert key == ( + "platform/tarball-venvs/tracecat_registry/1.2.3/site-packages.squashfs" + ) + assert bucket == "registry" + output_path.write_bytes(existing_artifact) + return len(existing_artifact) + + download_file_to_path = mocker.patch( + "tracecat.registry.sync.base_service.blob.download_file_to_path", + mocker.AsyncMock(side_effect=fake_download_file_to_path), + ) service = PlatformRegistrySyncService(mocker.Mock(spec=AsyncSession)) @@ -209,11 +234,13 @@ async def test_platform_builtin_sync_reuses_existing_artifact_objects( "s3://registry/platform/tarball-venvs/tracecat_registry/1.2.3/" "site-packages.squashfs" ) + assert result.artifact_hash == hashlib.sha256(existing_artifact).hexdigest() ensure_bucket_exists.assert_awaited_once_with("registry") file_exists.assert_awaited_once_with( key="platform/tarball-venvs/tracecat_registry/1.2.3/site-packages.squashfs", bucket="registry", ) + download_file_to_path.assert_awaited_once() build_builtin_registry_artifact.assert_not_awaited() upload_squashfs_venv.assert_not_awaited() @@ -258,6 +285,7 @@ async def test_platform_builtin_sync_uploads_squashfs_artifact( ) assert result.artifact_uri.endswith("/1.2.3/site-packages.squashfs") + assert result.artifact_hash == artifact_result.content_hash build_builtin_registry_artifact.assert_awaited_once() upload_squashfs_venv.assert_awaited_once_with( squashfs_path=artifact_result.squashfs_path, @@ -293,6 +321,13 @@ async def test_platform_builtin_sync_uses_prebuilt_manifest_without_discovery( version="1.2.3", ) write_prebuilt_registry_manifest(artifact_dir=artifact_dir, manifest=manifest) + write_prebuilt_registry_artifact_metadata( + artifact_dir=artifact_dir, + metadata=PrebuiltRegistryArtifactMetadata( + artifact_hash="b" * 64, + artifact_size_bytes=123, + ), + ) fetch_actions_from_subprocess = mocker.patch( "tracecat.registry.sync.base_service.fetch_actions_from_subprocess", @@ -316,6 +351,8 @@ async def test_platform_builtin_sync_uses_prebuilt_manifest_without_discovery( assert result.num_actions == 1 assert result.actions[0].default_title == "Prebuilt title" assert result.artifact_uri.endswith("/1.2.3/site-packages.squashfs") + assert result.artifact_hash == "b" * 64 + assert result.version.artifact_hash == "b" * 64 assert RegistryVersionManifest.model_validate(result.version.manifest) == manifest fetch_actions_from_subprocess.assert_not_awaited() @@ -430,6 +467,13 @@ async def test_platform_builtin_sync_can_create_pending_version( version="1.2.3", ) write_prebuilt_registry_manifest(artifact_dir=artifact_dir, manifest=manifest) + write_prebuilt_registry_artifact_metadata( + artifact_dir=artifact_dir, + metadata=PrebuiltRegistryArtifactMetadata( + artifact_hash="c" * 64, + artifact_size_bytes=123, + ), + ) mocker.patch( "tracecat.registry.sync.base_service.fetch_actions_from_subprocess", @@ -452,6 +496,7 @@ async def test_platform_builtin_sync_can_create_pending_version( ) assert result.version.version == "1.2.3" + assert result.version.artifact_hash == "c" * 64 assert repo.current_version_id is None diff --git a/tests/unit/test_registry_sync_runner.py b/tests/unit/test_registry_sync_runner.py index f0861d19b5..653765131d 100644 --- a/tests/unit/test_registry_sync_runner.py +++ b/tests/unit/test_registry_sync_runner.py @@ -19,7 +19,12 @@ from tracecat.registry.artifact_keys import get_artifact_local_dir from tracecat.registry.constants import DEFAULT_REGISTRY_ORIGIN from tracecat.registry.sync.artifact import RegistryArtifactBuildResult -from tracecat.registry.sync.prebuilt import write_prebuilt_registry_manifest +from tracecat.registry.sync.prebuilt import ( + PrebuiltRegistryArtifactMetadata, + load_prebuilt_registry_artifact_metadata, + write_prebuilt_registry_artifact_metadata, + write_prebuilt_registry_manifest, +) from tracecat.registry.sync.runner import ( RegistrySyncRunner, RegistrySyncValidationError, @@ -79,6 +84,29 @@ def test_write_prebuilt_registry_manifest_is_compact(tmp_path: Path) -> None: assert RegistryVersionManifest.model_validate_json(manifest_text) == manifest +def test_write_prebuilt_registry_artifact_metadata_is_compact(tmp_path: Path) -> None: + metadata = PrebuiltRegistryArtifactMetadata( + artifact_hash="a" * 64, + artifact_size_bytes=123, + ) + + metadata_path = write_prebuilt_registry_artifact_metadata( + artifact_dir=tmp_path, + metadata=metadata, + ) + + metadata_text = metadata_path.read_text() + assert "\n" not in metadata_text + assert " " not in metadata_text + assert ( + load_prebuilt_registry_artifact_metadata( + root=tmp_path.parent, + prefix=tmp_path.name, + ) + == metadata + ) + + def test_registry_sync_request_ignores_legacy_ssh_key() -> None: """Legacy SSH keys are accepted for rollout compatibility but not serialized.""" payload = { @@ -159,6 +187,7 @@ async def test_runner_passes_resolved_commit_sha_to_discovery( ) upload_tarball.assert_awaited_once() assert result.commit_sha == "resolved-sha" + assert result.artifact_hash == artifact_result.content_hash @pytest.mark.anyio @@ -359,6 +388,7 @@ async def test_runner_falls_back_to_discovery_when_prebuilt_manifest_is_invalid( result = await runner.run(request) assert result.tarball_uri.endswith("/site-packages.squashfs") + assert result.artifact_hash == artifact_result.content_hash build_tarball_venv.assert_awaited_once() discover_actions.assert_awaited_once_with( repository_id=repository_id, @@ -586,6 +616,7 @@ async def test_runner_uses_prebuilt_manifest_without_discovery( result = await runner.run(request) assert result.tarball_uri.endswith("/site-packages.squashfs") + assert result.artifact_hash == artifact_result.content_hash assert len(result.actions) == 1 assert result.actions[0].default_title == "Prebuilt title" build_tarball_venv.assert_awaited_once() diff --git a/tests/unit/test_registry_version_schemas.py b/tests/unit/test_registry_version_schemas.py new file mode 100644 index 0000000000..dec8805b28 --- /dev/null +++ b/tests/unit/test_registry_version_schemas.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import uuid + +import pytest +from pydantic import ValidationError + +from tracecat.registry.versions.schemas import ( + RegistryVersionCreate, + RegistryVersionManifest, +) + + +def _registry_version_create(*, artifact_hash: str | None) -> RegistryVersionCreate: + return RegistryVersionCreate( + repository_id=uuid.uuid4(), + version="1.0.0", + manifest=RegistryVersionManifest(), + tarball_uri="s3://registry-artifacts/path/site-packages.squashfs", + artifact_hash=artifact_hash, + ) + + +def test_registry_version_create_accepts_sha256_artifact_hash() -> None: + version = _registry_version_create(artifact_hash="a" * 64) + + assert version.artifact_hash == "a" * 64 + + +@pytest.mark.parametrize( + "artifact_hash", + [ + "a" * 63, + "a" * 65, + "g" * 64, + "not-a-sha256", + ], +) +def test_registry_version_create_rejects_invalid_artifact_hash( + artifact_hash: str, +) -> None: + with pytest.raises(ValidationError): + _registry_version_create(artifact_hash=artifact_hash) diff --git a/tracecat/admin/registry/schemas.py b/tracecat/admin/registry/schemas.py index 1a0827936b..21238a4930 100644 --- a/tracecat/admin/registry/schemas.py +++ b/tracecat/admin/registry/schemas.py @@ -54,6 +54,7 @@ class RegistryVersionRead(BaseModel): version: str commit_sha: str | None tarball_uri: str | None + artifact_hash: str | None = None created_at: datetime is_current: bool = False artifacts_ready: bool = False diff --git a/tracecat/admin/registry/service.py b/tracecat/admin/registry/service.py index d7168579e1..95a2baa669 100644 --- a/tracecat/admin/registry/service.py +++ b/tracecat/admin/registry/service.py @@ -487,6 +487,7 @@ async def list_versions( version=version.version, commit_sha=version.commit_sha, tarball_uri=version.tarball_uri, + artifact_hash=version.artifact_hash, created_at=version.created_at, is_current=is_current, artifacts_ready=artifacts_ready, diff --git a/tracecat/config.py b/tracecat/config.py index 9e6314ccf8..00bd1043e0 100644 --- a/tracecat/config.py +++ b/tracecat/config.py @@ -1166,8 +1166,10 @@ def _parse_auth_types() -> set[AuthType]: ).lower() in ("true", "1") """Build SquashFS sidecars for registry tarball venvs when mksquashfs is available.""" +_DEFAULT_REGISTRY_SYNC_SQUASHFS_PROCESSORS = os.cpu_count() or 1 TRACECAT__REGISTRY_SYNC_SQUASHFS_PROCESSORS = int( - os.environ.get("TRACECAT__REGISTRY_SYNC_SQUASHFS_PROCESSORS") or 1 + os.environ.get("TRACECAT__REGISTRY_SYNC_SQUASHFS_PROCESSORS") + or _DEFAULT_REGISTRY_SYNC_SQUASHFS_PROCESSORS ) """Number of processors mksquashfs may use for registry SquashFS sidecar builds.""" diff --git a/tracecat/db/models.py b/tracecat/db/models.py index d448df27f8..74339cd246 100644 --- a/tracecat/db/models.py +++ b/tracecat/db/models.py @@ -1586,6 +1586,11 @@ class BaseRegistryVersion(Base): nullable=False, doc="S3 URI to the compressed tarball venv for action execution", ) + artifact_hash: Mapped[str | None] = mapped_column( + String, + nullable=True, + doc="SHA-256 content hash of the registry execution artifact", + ) class RegistryVersion(OrganizationModel, BaseRegistryVersion): diff --git a/tracecat/executor/backends/registry_helpers.py b/tracecat/executor/backends/registry_helpers.py index 0243ae7383..94175f9cda 100644 --- a/tracecat/executor/backends/registry_helpers.py +++ b/tracecat/executor/backends/registry_helpers.py @@ -7,7 +7,10 @@ from tracecat import config from tracecat.auth.types import Role from tracecat.dsl.schemas import RunActionInput -from tracecat.executor.registry_artifacts import bundled_builtin_registry_uri +from tracecat.executor.registry_artifacts import ( + bundled_builtin_registry_uri, + registry_artifact_ref, +) from tracecat.executor.service import ( RegistryArtifactsContext, get_registry_artifacts_for_lock, @@ -17,7 +20,10 @@ DEFAULT_REGISTRY_ORIGIN, PLATFORM_REGISTRY_NAMESPACE, ) -from tracecat.registry.sync.prebuilt import load_prebuilt_builtin_registry_manifest +from tracecat.registry.sync.prebuilt import ( + load_prebuilt_builtin_registry_artifact_metadata, + load_prebuilt_builtin_registry_manifest, +) from tracecat.registry.versions.schemas import registry_manifest_fingerprint @@ -53,6 +59,7 @@ async def get_registry_artifact_uris( artifacts = await get_registry_artifacts_for_lock( origins, role.organization_id, + origin_fingerprints=getattr(input.registry_lock, "origin_fingerprints", None), ) return artifact_uris + sort_registry_artifact_uris(artifacts) @@ -67,6 +74,18 @@ def _bundled_builtin_matches_lock(registry_lock: object, version: str) -> bool: if expected is None: return True + local_fingerprints: set[str] = set() + + artifact_metadata = load_prebuilt_builtin_registry_artifact_metadata( + origin=DEFAULT_REGISTRY_ORIGIN, + target_version=version, + storage_namespace=PLATFORM_REGISTRY_NAMESPACE, + ) + if artifact_metadata is not None: + local_fingerprints.add(artifact_metadata.artifact_hash) + if expected == artifact_metadata.artifact_hash: + return True + manifest = load_prebuilt_builtin_registry_manifest( origin=DEFAULT_REGISTRY_ORIGIN, target_version=version, @@ -79,13 +98,13 @@ def _bundled_builtin_matches_lock(registry_lock: object, version: str) -> bool: ) return False - actual = registry_manifest_fingerprint(manifest) - if actual != expected: + local_fingerprints.add(registry_manifest_fingerprint(manifest)) + if expected not in local_fingerprints: logger.info( "Bundled builtin registry fingerprint mismatch; using artifact lookup", registry_version=version, expected_fingerprint=expected, - actual_fingerprint=actual, + local_fingerprints=sorted(local_fingerprints), ) return False @@ -102,10 +121,14 @@ def sort_registry_artifact_uris( for artifact in artifacts: if not artifact.artifact_uri: continue + artifact_ref = registry_artifact_ref( + artifact.artifact_uri, + artifact.artifact_hash, + ) if artifact.origin == DEFAULT_REGISTRY_ORIGIN: - builtin_uris.append(artifact.artifact_uri) + builtin_uris.append(artifact_ref) else: - other_uris.append((artifact.origin, artifact.artifact_uri)) + other_uris.append((artifact.origin, artifact_ref)) other_uris.sort(key=lambda item: item[0]) return builtin_uris + [uri for _, uri in other_uris] diff --git a/tracecat/executor/registry_artifacts.py b/tracecat/executor/registry_artifacts.py index 2a82a28490..2a214f23e4 100644 --- a/tracecat/executor/registry_artifacts.py +++ b/tracecat/executor/registry_artifacts.py @@ -10,7 +10,7 @@ import tarfile import time from abc import ABC, abstractmethod -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import StrEnum from pathlib import Path @@ -43,6 +43,9 @@ class RegistryArtifactFormat(StrEnum): BUNDLED_BUILTIN_REGISTRY_URI_PREFIX = f"tracecat-builtin://{DEFAULT_REGISTRY_ORIGIN}/" """Pseudo-URI for the builtin registry already installed in the executor image.""" +ARTIFACT_SHA256_FRAGMENT_PREFIX = "#sha256=" +"""URI fragment used to carry expected registry artifact content hashes.""" + @dataclass(frozen=True, slots=True) class RegistryArtifactPaths: @@ -87,6 +90,7 @@ class RegistryArtifact(ABC): uri: str cache_key: str + expected_hash: str | None = field(default=None, kw_only=True) @property @abstractmethod @@ -198,7 +202,11 @@ async def download( temp_image = self._temp_path(ctx, ".squashfs") try: download_start = time.monotonic() - await _download_s3_artifact(self.uri, temp_image) + await _download_s3_artifact( + self.uri, + temp_image, + expected_sha256=self.expected_hash, + ) try: temp_image.rename(image_path) except OSError: @@ -428,7 +436,11 @@ async def download( ctx: RegistryArtifactMaterializationContext, output_path: Path, ) -> None: - await _download_s3_artifact(self.uri, output_path) + await _download_s3_artifact( + self.uri, + output_path, + expected_sha256=self.expected_hash, + ) async def extract(self, tarball_path: Path, target_dir: Path) -> None: """Extract a supported registry tarball to target directory.""" @@ -449,7 +461,12 @@ def _do_extract() -> None: ) -async def _download_s3_artifact(artifact_uri: str, output_path: Path) -> None: +async def _download_s3_artifact( + artifact_uri: str, + output_path: Path, + *, + expected_sha256: str | None = None, +) -> None: """Download an S3 registry artifact to a local path.""" bucket, key = parse_s3_uri(artifact_uri) try: @@ -457,6 +474,7 @@ async def _download_s3_artifact(artifact_uri: str, output_path: Path) -> None: key=key, bucket=bucket, output_path=output_path, + expected_sha256=expected_sha256, ) except FileNotFoundError as e: request = httpx.Request("GET", artifact_uri) @@ -472,11 +490,28 @@ def compute_registry_artifact_cache_key(artifact_uri: str) -> str: """Compute the local cache key for a registry artifact URI.""" if not artifact_uri: return "base" - # S3 keys are case-sensitive, so preserve URI case when hashing. + # S3 keys and SHA-256 fragments are case-sensitive, so preserve URI case. content = artifact_uri.strip() return hashlib.sha256(content.encode()).hexdigest()[:16] +def registry_artifact_ref(artifact_uri: str, artifact_hash: str | None) -> str: + """Return an artifact reference carrying an optional expected SHA-256 hash.""" + if not artifact_hash: + return artifact_uri + return f"{artifact_uri}{ARTIFACT_SHA256_FRAGMENT_PREFIX}{artifact_hash}" + + +def split_registry_artifact_ref(artifact_ref: str) -> tuple[str, str | None]: + """Split an artifact reference into its storage URI and expected hash.""" + artifact_uri, separator, expected_hash = artifact_ref.partition( + ARTIFACT_SHA256_FRAGMENT_PREFIX + ) + if not separator: + return artifact_ref, None + return artifact_uri, expected_hash or None + + def bundled_builtin_registry_uri(version: str) -> str: """Return the pseudo-URI for the installed builtin registry package.""" return f"{BUNDLED_BUILTIN_REGISTRY_URI_PREFIX}{version}" @@ -561,19 +596,29 @@ async def ensure_environment(self, artifact_uri: str | None) -> list[Path]: if not artifact_uri: return [] cache_key = compute_registry_artifact_cache_key(artifact_uri) - return await self.materialize(cache_key, artifact_uri) + artifact_uri, expected_hash = split_registry_artifact_ref(artifact_uri) + return await self.materialize(cache_key, artifact_uri, expected_hash) - async def materialize(self, cache_key: str, artifact_uri: str) -> list[Path]: + async def materialize( + self, + cache_key: str, + artifact_uri: str, + expected_hash: str | None = None, + ) -> list[Path]: """Materialize a registry artifact as local importable directories.""" ctx = self._context_for(cache_key) - candidates = await self._artifact_candidates(ctx, artifact_uri) + candidates = await self._artifact_candidates(ctx, artifact_uri, expected_hash) if cached_paths := self._first_cached_path(candidates, ctx): return cached_paths lock = await self._lock_for(cache_key) async with lock: - candidates = await self._artifact_candidates(ctx, artifact_uri) + candidates = await self._artifact_candidates( + ctx, + artifact_uri, + expected_hash, + ) if cached_paths := self._first_cached_path(candidates, ctx): return cached_paths @@ -641,6 +686,7 @@ async def _artifact_candidates( self, ctx: RegistryArtifactMaterializationContext, artifact_uri: str, + expected_hash: str | None = None, ) -> list[RegistryArtifact]: """Return artifact candidates in executor preference order.""" if version := _bundled_builtin_registry_version(artifact_uri): @@ -658,9 +704,12 @@ async def _artifact_candidates( SquashfsArtifact( uri=artifact_uri, cache_key=ctx.cache_key, + expected_hash=expected_hash, ) ] - if tarball_uri := _tarball_uri_for_squashfs(artifact_uri): + if expected_hash is None and ( + tarball_uri := _tarball_uri_for_squashfs(artifact_uri) + ): candidates.append( TarballArtifact( uri=tarball_uri, @@ -672,12 +721,13 @@ async def _artifact_candidates( candidates: list[RegistryArtifact] = [] if self._can_try_squashfs(): squashfs_uri = _squashfs_sidecar_uri(artifact_uri) - if squashfs_uri: + if squashfs_uri and expected_hash is None: if ctx.paths.squashfs_image_path.exists(): candidates.append( SquashfsArtifact( uri=squashfs_uri, cache_key=ctx.cache_key, + expected_hash=None, ) ) elif await self._sidecar_exists( @@ -689,6 +739,7 @@ async def _artifact_candidates( SquashfsArtifact( uri=squashfs_uri, cache_key=ctx.cache_key, + expected_hash=None, ) ) @@ -696,6 +747,7 @@ async def _artifact_candidates( TarballArtifact( uri=artifact_uri, cache_key=ctx.cache_key, + expected_hash=expected_hash, ) ) return candidates diff --git a/tracecat/executor/service.py b/tracecat/executor/service.py index d8b0665991..7167043f2a 100644 --- a/tracecat/executor/service.py +++ b/tracecat/executor/service.py @@ -68,6 +68,10 @@ from tracecat.registry.actions.schemas import TemplateActionDefinition from tracecat.registry.actions.service import RegistryActionsService from tracecat.registry.constants import DEFAULT_REGISTRY_ORIGIN +from tracecat.registry.versions.schemas import ( + RegistryVersionManifest, + registry_manifest_fingerprint, +) from tracecat.secrets import secrets_manager from tracecat.secrets.common import apply_masks_object from tracecat.variables.schemas import VariableSearch @@ -90,6 +94,7 @@ class RegistryArtifactsContext: origin: str version: str artifact_uri: str + artifact_hash: str | None = None # Cache for individual artifacts (origin, version) -> RegistryArtifactsContext @@ -98,14 +103,60 @@ class RegistryArtifactsContext: def _artifact_cache_key( - origin: str, version: str, organization_id: OrganizationID + origin: str, + version: str, + organization_id: OrganizationID, + fingerprint: str | None = None, ) -> str: - return f"artifact:{organization_id}:{origin}:{version}" + return f"artifact:{organization_id}:{origin}:{version}:{fingerprint or ''}" + + +def _resolve_artifact_hash_for_lock( + *, + origin: str, + version: str, + manifest: Mapping[str, Any], + artifact_hash: str | None, + origin_fingerprints: dict[str, str] | None, +) -> str | None: + expected_fingerprint = ( + origin_fingerprints.get(origin) if origin_fingerprints else None + ) + if expected_fingerprint is None: + return artifact_hash + + # Workflow locks are the execution receipt. When a lock has an artifact hash, + # the database may tell us where that artifact lives, but it must not rewrite + # the locked hash from H1 to the current row's H2 after a repair or rebuild. + if artifact_hash is not None and expected_fingerprint == artifact_hash: + return expected_fingerprint + + manifest_fingerprint = registry_manifest_fingerprint( + RegistryVersionManifest.model_validate(manifest) + ) + if expected_fingerprint == manifest_fingerprint: + # Older locks only had manifest fingerprints. They cannot pin artifact + # bytes, so use the current row hash for download verification while + # still requiring the locked manifest identity to match. + return artifact_hash + + raise RegistryValidationError( + "Locked registry artifact fingerprint mismatch for " + f"origin {origin!r} version {version!r}: lock fingerprint does not match " + "the current artifact hash or manifest fingerprint", + key=origin, + err=( + f"lock_fingerprint={expected_fingerprint}, " + f"artifact_hash={artifact_hash}, " + f"manifest_fingerprint={manifest_fingerprint}" + ), + ) async def get_registry_artifacts_for_lock( origins: dict[str, str], organization_id: OrganizationID, + origin_fingerprints: dict[str, str] | None = None, ) -> list[RegistryArtifactsContext]: """Get registry tarball URIs for specific locked versions. @@ -132,7 +183,14 @@ async def get_registry_artifacts_for_lock( org_misses: list[tuple[str, str]] = [] for origin, version in origins.items(): - key = _artifact_cache_key(origin, version, organization_id) + key = _artifact_cache_key( + origin, + version, + organization_id, + fingerprint=origin_fingerprints.get(origin) + if origin_fingerprints + else None, + ) cached = await _artifact_cache.get(key=key) if cached is not None: cached_artifacts.append(cached) @@ -164,6 +222,8 @@ async def get_registry_artifacts_for_lock( PlatformRegistryRepository.origin, PlatformRegistryVersion.version, PlatformRegistryVersion.tarball_uri, + PlatformRegistryVersion.artifact_hash, + PlatformRegistryVersion.manifest, ) .join( PlatformRegistryVersion, @@ -188,6 +248,8 @@ async def get_registry_artifacts_for_lock( RegistryRepository.origin, RegistryVersion.version, RegistryVersion.tarball_uri, + RegistryVersion.artifact_hash, + RegistryVersion.manifest, ) .join( RegistryVersion, @@ -212,17 +274,32 @@ async def get_registry_artifacts_for_lock( # Process results found_keys: set[tuple[str, str]] = set() for row in rows: - origin_val, version_val, artifact_uri = row + origin_val, version_val, artifact_uri, artifact_hash, manifest = row if artifact_uri is not None: + resolved_artifact_hash = _resolve_artifact_hash_for_lock( + origin=origin_val, + version=version_val, + manifest=manifest, + artifact_hash=artifact_hash, + origin_fingerprints=origin_fingerprints, + ) artifact = RegistryArtifactsContext( origin=origin_val, version=version_val, artifact_uri=artifact_uri, + artifact_hash=resolved_artifact_hash, ) fetched_artifacts.append(artifact) found_keys.add((origin_val, version_val)) # Cache the artifact - key = _artifact_cache_key(origin_val, version_val, organization_id) + key = _artifact_cache_key( + origin_val, + version_val, + organization_id, + fingerprint=origin_fingerprints.get(origin_val) + if origin_fingerprints + else None, + ) await _artifact_cache.set(key=key, value=artifact) else: logger.warning( diff --git a/tracecat/registry/lock/service.py b/tracecat/registry/lock/service.py index ea98903da6..bc08d36ef2 100644 --- a/tracecat/registry/lock/service.py +++ b/tracecat/registry/lock/service.py @@ -72,6 +72,7 @@ async def resolve_lock_with_bindings( PlatformRegistryRepository.origin, PlatformRegistryVersion.version, PlatformRegistryVersion.manifest, + PlatformRegistryVersion.artifact_hash, ) .join( PlatformRegistryVersion, @@ -88,7 +89,7 @@ async def resolve_lock_with_bindings( builtin_version = next( ( str(version) - for origin, version, _ in platform_rows + for origin, version, _, _ in platform_rows if origin == DEFAULT_REGISTRY_ORIGIN ), None, @@ -109,6 +110,7 @@ async def resolve_lock_with_bindings( RegistryRepository.origin, RegistryVersion.version, RegistryVersion.manifest, + RegistryVersion.artifact_hash, ) .join( RegistryVersion, @@ -146,17 +148,21 @@ async def resolve_lock_with_bindings( origin_manifests: dict[str, RegistryVersionManifest] = {} excluded_custom_origin_manifests: dict[str, RegistryVersionManifest] = {} - for origin, version, manifest_dict in rows: + for origin, version, manifest_dict, artifact_hash in rows: origin_str = str(origin) origins[origin_str] = str(version) manifest = RegistryVersionManifest.model_validate(manifest_dict) origin_manifests[origin_str] = manifest - fingerprint = registry_manifest_fingerprint(manifest) + fingerprint = ( + str(artifact_hash) + if artifact_hash + else registry_manifest_fingerprint(manifest) + ) origin_fingerprints[origin_str] = fingerprint if origin_str == DEFAULT_REGISTRY_ORIGIN and builtin_fingerprint is None: builtin_fingerprint = fingerprint if not custom_registry_enabled: - for origin, _version, manifest_dict in org_rows: + for origin, _version, manifest_dict, _artifact_hash in org_rows: origin_str = str(origin) excluded_custom_origin_manifests[origin_str] = ( RegistryVersionManifest.model_validate(manifest_dict) diff --git a/tracecat/registry/lock/types.py b/tracecat/registry/lock/types.py index 2b5d9f0722..c731928c20 100644 --- a/tracecat/registry/lock/types.py +++ b/tracecat/registry/lock/types.py @@ -13,9 +13,9 @@ class RegistryLock(BaseModel): Example: {"tracecat_registry": "2024.12.10.123456"} actions: Maps action name to its source origin. Example: {"core.transform.reshape": "tracecat_registry"} - origin_fingerprints: Optional immutable manifest fingerprints for origins. - New executors use the builtin fingerprint to decide whether their - bundled tracecat_registry package is an exact match for the lock. + origin_fingerprints: Optional immutable origin fingerprints. New locks + prefer execution artifact SHA-256 hashes and fall back to manifest + fingerprints when older versions do not have artifact hashes. """ origins: dict[str, str] diff --git a/tracecat/registry/repositories/schemas.py b/tracecat/registry/repositories/schemas.py index 3f7caaa848..3e7eee992f 100644 --- a/tracecat/registry/repositories/schemas.py +++ b/tracecat/registry/repositories/schemas.py @@ -178,6 +178,7 @@ class RegistryVersionRead(BaseModel): version: str commit_sha: str | None tarball_uri: str | None + artifact_hash: str | None = None created_at: datetime model_config = {"from_attributes": True} diff --git a/tracecat/registry/sync/artifact.py b/tracecat/registry/sync/artifact.py index d787b2a422..01f0986d27 100644 --- a/tracecat/registry/sync/artifact.py +++ b/tracecat/registry/sync/artifact.py @@ -46,6 +46,9 @@ class RegistryArtifactBuildResult: artifact_size_bytes: int +_REPRODUCIBLE_SQUASHFS_TIMESTAMP = "0" + + def _compute_file_hash(file_path: Path) -> str: """Compute SHA256 hash of a file.""" sha256_hash = hashlib.sha256() @@ -96,7 +99,7 @@ def _remove_existing_entry() -> None: if dest.is_symlink() or dest.is_file(): dest.unlink() dest.mkdir(parents=True, exist_ok=True) - for child in path.iterdir(): + for child in sorted(path.iterdir(), key=lambda child: child.name): _copy_squashfs_entry( child, dest / child.name, @@ -143,7 +146,7 @@ def _create_squashfs_image( # TemporaryDirectory creates a 0700 root, which would make the mounted # SquashFS unreadable to non-root executor processes after -all-root. staging_dir.chmod(0o755) - for path, arcname in entries: + for path, arcname in sorted(entries, key=lambda entry: entry[1]): _copy_squashfs_entry( path, staging_dir / arcname, @@ -157,8 +160,20 @@ def _create_squashfs_image( "-noappend", "-comp", "gzip", + "-Xcompression-level", + "6", "-no-xattrs", "-all-root", + "-root-mode", + "755", + "-reproducible", + "-mkfs-time", + _REPRODUCIBLE_SQUASHFS_TIMESTAMP, + "-all-time", + _REPRODUCIBLE_SQUASHFS_TIMESTAMP, + "-root-time", + _REPRODUCIBLE_SQUASHFS_TIMESTAMP, + "-no-hardlinks", "-processors", str(config.TRACECAT__REGISTRY_SYNC_SQUASHFS_PROCESSORS), "-mem", diff --git a/tracecat/registry/sync/base_service.py b/tracecat/registry/sync/base_service.py index 0d21204f36..ae138e926a 100644 --- a/tracecat/registry/sync/base_service.py +++ b/tracecat/registry/sync/base_service.py @@ -2,6 +2,7 @@ from __future__ import annotations +import hashlib import uuid from collections.abc import AsyncGenerator from contextlib import asynccontextmanager @@ -38,7 +39,10 @@ build_builtin_registry_artifact, upload_squashfs_venv, ) -from tracecat.registry.sync.prebuilt import load_prebuilt_builtin_registry_manifest +from tracecat.registry.sync.prebuilt import ( + load_prebuilt_builtin_registry_artifact_metadata, + load_prebuilt_builtin_registry_manifest, +) from tracecat.registry.sync.schemas import RegistrySyncRequest from tracecat.registry.sync.subprocess import fetch_actions_from_subprocess from tracecat.registry.versions.schemas import ( @@ -54,6 +58,8 @@ from tracecat.ssh import SshEnv +READ_HASH_CHUNK_SIZE_BYTES = 1024 * 1024 + class RepositoryProtocol(Protocol): id: Mapped[uuid.UUID] @@ -65,6 +71,7 @@ class VersionProtocol(Protocol): id: Mapped[uuid.UUID] version: Mapped[str] tarball_uri: Mapped[str] + artifact_hash: Mapped[str | None] manifest: Mapped[dict[str, object]] @@ -101,6 +108,7 @@ class ArtifactsBuildResult: """Result of building and uploading execution artifacts.""" artifact_uri: str + artifact_hash: str | None = None @dataclass @@ -110,6 +118,7 @@ class BaseSyncResult[VersionT: VersionProtocol]: version: VersionT actions: list[RegistryActionCreate] artifact_uri: str + artifact_hash: str | None = None commit_sha: str | None = None @property @@ -121,6 +130,27 @@ def num_actions(self) -> int: return len(self.actions) +async def _compute_existing_artifact_hash(*, key: str, bucket: str) -> str: + async with aiofiles.tempfile.TemporaryDirectory( + prefix="tracecat_registry_existing_artifact_" + ) as temp_dir: + artifact_path = Path(temp_dir) / Path(key).name + await blob.download_file_to_path( + key=key, + bucket=bucket, + output_path=artifact_path, + ) + return await _compute_file_sha256(artifact_path) + + +async def _compute_file_sha256(path: Path) -> str: + hasher = hashlib.sha256() + async with aiofiles.open(path, "rb") as f: + while chunk := await f.read(READ_HASH_CHUNK_SIZE_BYTES): + hasher.update(chunk) + return hasher.hexdigest() + + class BaseRegistrySyncService[ RepoT: RepositoryProtocol, VersionT: VersionProtocol, @@ -347,10 +377,16 @@ async def sync_repository_v2( ) # Prefer release-built builtin metadata; fall back to subprocess discovery. + storage_namespace = self._get_storage_namespace() prebuilt_manifest = load_prebuilt_builtin_registry_manifest( origin=origin, target_version=target_version, - storage_namespace=self._get_storage_namespace(), + storage_namespace=storage_namespace, + ) + prebuilt_artifact_metadata = load_prebuilt_builtin_registry_artifact_metadata( + origin=origin, + target_version=target_version, + storage_namespace=storage_namespace, ) actions: list[RegistryActionCreate] | None = None manifest: RegistryVersionManifest | None = None @@ -436,6 +472,7 @@ async def sync_repository_v2( version=existing_version, actions=actions, artifact_uri=existing_artifact_uri, + artifact_hash=existing_version.artifact_hash, commit_sha=commit_sha, ) @@ -444,7 +481,10 @@ async def sync_repository_v2( artifact_uri=self._artifact_uri_for_version( origin=origin, version_string=target_version, - ) + ), + artifact_hash=prebuilt_artifact_metadata.artifact_hash + if prebuilt_artifact_metadata + else None, ) else: artifacts = await self._build_and_upload_artifacts( @@ -460,6 +500,7 @@ async def sync_repository_v2( commit_sha=commit_sha, manifest=manifest, tarball_uri=artifacts.artifact_uri, + artifact_hash=artifacts.artifact_hash, ) version = await versions_service.create_version(version_create, commit=False) @@ -496,6 +537,7 @@ async def sync_repository_v2( version=version, actions=actions, artifact_uri=artifacts.artifact_uri, + artifact_hash=artifacts.artifact_hash, commit_sha=commit_sha, ) @@ -549,7 +591,14 @@ async def _build_and_upload_artifacts( "Using existing registry execution artifact", artifact_uri=artifact_uri, ) - return ArtifactsBuildResult(artifact_uri=artifact_uri) + artifact_hash = await _compute_existing_artifact_hash( + key=artifact_key, + bucket=bucket, + ) + return ArtifactsBuildResult( + artifact_uri=artifact_uri, + artifact_hash=artifact_hash, + ) async with aiofiles.tempfile.TemporaryDirectory( prefix="tracecat_registry_artifact_" @@ -594,10 +643,14 @@ async def _build_and_upload_artifacts( self.logger.info( "Registry execution artifact uploaded", artifact_uri=artifact_uri, + artifact_hash=artifact_result.content_hash, artifact_size_bytes=artifact_result.artifact_size_bytes, ) - return ArtifactsBuildResult(artifact_uri=artifact_uri) + return ArtifactsBuildResult( + artifact_uri=artifact_uri, + artifact_hash=artifact_result.content_hash, + ) def _generate_version_string( self, @@ -751,6 +804,7 @@ async def _sync_via_temporal_workflow( actions = workflow_result.actions self._raise_if_validation_errors(workflow_result.validation_errors) artifact_uri = workflow_result.artifact_uri + artifact_hash = workflow_result.artifact_hash commit_sha = workflow_result.commit_sha if not actions: @@ -794,6 +848,7 @@ async def _sync_via_temporal_workflow( version=existing_version, actions=actions, artifact_uri=existing_artifact_uri, + artifact_hash=existing_version.artifact_hash, commit_sha=commit_sha, ) @@ -803,6 +858,7 @@ async def _sync_via_temporal_workflow( commit_sha=commit_sha, manifest=manifest, tarball_uri=artifact_uri, + artifact_hash=artifact_hash, ) version = await versions_service.create_version(version_create, commit=False) @@ -839,5 +895,6 @@ async def _sync_via_temporal_workflow( version=version, actions=actions, artifact_uri=artifact_uri, + artifact_hash=artifact_hash, commit_sha=commit_sha, ) diff --git a/tracecat/registry/sync/jobs.py b/tracecat/registry/sync/jobs.py index 57eb980a86..e174ee9479 100644 --- a/tracecat/registry/sync/jobs.py +++ b/tracecat/registry/sync/jobs.py @@ -337,14 +337,16 @@ async def _build_platform_registry_artifact( "Platform registry artifact build completed", target_version=target_version, artifact_uri=result.artifact_uri, + artifact_hash=result.artifact_hash, ) if promote_version_id is not None: await _promote_platform_registry_version_after_artifact_build( session, target_version=target_version, version_id=promote_version_id, - artifact_uri=result.artifact_uri, expected_current_version_id=expected_current_version_id, + artifact_uri=result.artifact_uri, + artifact_hash=result.artifact_hash, ) @@ -353,8 +355,9 @@ async def _promote_platform_registry_version_after_artifact_build( *, target_version: str, version_id: UUID, - artifact_uri: str, expected_current_version_id: UUID | None, + artifact_uri: str, + artifact_hash: str | None, ) -> None: repos_service = PlatformRegistryReposService(session) versions_service = PlatformRegistryVersionsService(session) @@ -411,9 +414,16 @@ async def _promote_platform_registry_version_after_artifact_build( version.tarball_uri = artifact_uri session.add(version) - await repos_service.promote_version(repo, version_id) + version.tarball_uri = artifact_uri + if artifact_hash is not None: + version.artifact_hash = artifact_hash + session.add(version) + repo.current_version_id = version.id + session.add(repo) + await session.commit() logger.info( "Promoted platform registry version after artifact build", target_version=target_version, version_id=str(version_id), + artifact_hash=version.artifact_hash, ) diff --git a/tracecat/registry/sync/prebuild.py b/tracecat/registry/sync/prebuild.py index db530fb43b..f61ad3ab2a 100644 --- a/tracecat/registry/sync/prebuild.py +++ b/tracecat/registry/sync/prebuild.py @@ -14,8 +14,13 @@ DEFAULT_REGISTRY_ORIGIN, PLATFORM_REGISTRY_NAMESPACE, ) +from tracecat.registry.sync.artifact import build_builtin_registry_artifact from tracecat.registry.sync.entrypoint import load_and_serialize_actions -from tracecat.registry.sync.prebuilt import write_prebuilt_registry_manifest +from tracecat.registry.sync.prebuilt import ( + PrebuiltRegistryArtifactMetadata, + write_prebuilt_registry_artifact_metadata, + write_prebuilt_registry_manifest, +) from tracecat.registry.versions.schemas import RegistryVersionManifest PREBUILD_REPOSITORY_ID = UUID("00000000-0000-4000-8000-000000000000") @@ -44,6 +49,15 @@ async def prebuild_builtin_registry_manifest(output_root: Path | None = None) -> ) manifest = RegistryVersionManifest.from_actions(sync_result.actions) write_prebuilt_registry_manifest(artifact_dir=output_dir, manifest=manifest) + artifact = await build_builtin_registry_artifact(output_dir=output_dir) + write_prebuilt_registry_artifact_metadata( + artifact_dir=output_dir, + metadata=PrebuiltRegistryArtifactMetadata( + artifact_hash=artifact.content_hash, + artifact_size_bytes=artifact.artifact_size_bytes, + ), + ) + artifact.squashfs_path.unlink(missing_ok=True) return output_dir diff --git a/tracecat/registry/sync/prebuilt.py b/tracecat/registry/sync/prebuilt.py index 3f715d731a..034312a2d5 100644 --- a/tracecat/registry/sync/prebuilt.py +++ b/tracecat/registry/sync/prebuilt.py @@ -4,7 +4,7 @@ from pathlib import Path -from pydantic import ValidationError +from pydantic import BaseModel, ValidationError from tracecat import config from tracecat.logger import logger @@ -13,6 +13,14 @@ from tracecat.registry.versions.schemas import RegistryVersionManifest PREBUILT_MANIFEST_FILENAME = "manifest.json" +PREBUILT_ARTIFACT_METADATA_FILENAME = "artifact.json" + + +class PrebuiltRegistryArtifactMetadata(BaseModel): + """Release-built metadata for the builtin registry execution artifact.""" + + artifact_hash: str + artifact_size_bytes: int def _prebuilt_root() -> Path: @@ -39,6 +47,11 @@ def get_prebuilt_registry_manifest_path(*, root: Path, prefix: str) -> Path: return root / prefix / PREBUILT_MANIFEST_FILENAME +def get_prebuilt_registry_artifact_metadata_path(*, root: Path, prefix: str) -> Path: + """Return the artifact metadata path for a deterministic artifact prefix.""" + return root / prefix / PREBUILT_ARTIFACT_METADATA_FILENAME + + def write_prebuilt_registry_manifest( *, artifact_dir: Path, @@ -51,6 +64,18 @@ def write_prebuilt_registry_manifest( return manifest_path +def write_prebuilt_registry_artifact_metadata( + *, + artifact_dir: Path, + metadata: PrebuiltRegistryArtifactMetadata, +) -> Path: + """Write compact release-built artifact identity metadata.""" + artifact_dir.mkdir(parents=True, exist_ok=True) + metadata_path = artifact_dir / PREBUILT_ARTIFACT_METADATA_FILENAME + metadata_path.write_text(metadata.model_dump_json()) + return metadata_path + + def load_prebuilt_registry_manifest( *, root: Path, @@ -71,6 +96,31 @@ def load_prebuilt_registry_manifest( return None +def load_prebuilt_registry_artifact_metadata( + *, + root: Path, + prefix: str, +) -> PrebuiltRegistryArtifactMetadata | None: + """Load release-built artifact identity metadata when present.""" + metadata_path = get_prebuilt_registry_artifact_metadata_path( + root=root, + prefix=prefix, + ) + if not metadata_path.exists(): + return None + try: + return PrebuiltRegistryArtifactMetadata.model_validate_json( + metadata_path.read_text() + ) + except (OSError, ValueError, ValidationError) as e: + logger.warning( + "Ignoring invalid prebuilt registry artifact metadata", + metadata_path=str(metadata_path), + error=str(e), + ) + return None + + def load_prebuilt_builtin_registry_manifest( *, origin: str, @@ -90,3 +140,24 @@ def load_prebuilt_builtin_registry_manifest( root=_prebuilt_root(), prefix=prefix, ) + + +def load_prebuilt_builtin_registry_artifact_metadata( + *, + origin: str, + target_version: str | None, + storage_namespace: str, +) -> PrebuiltRegistryArtifactMetadata | None: + """Load release-built builtin registry artifact metadata when available.""" + prefix = _builtin_artifact_prefix( + origin=origin, + target_version=target_version, + storage_namespace=storage_namespace, + ) + if prefix is None: + return None + + return load_prebuilt_registry_artifact_metadata( + root=_prebuilt_root(), + prefix=prefix, + ) diff --git a/tracecat/registry/sync/runner.py b/tracecat/registry/sync/runner.py index 7f96b037f1..5b320aeee7 100644 --- a/tracecat/registry/sync/runner.py +++ b/tracecat/registry/sync/runner.py @@ -258,6 +258,7 @@ async def run(self, request: RegistrySyncRequest) -> RegistrySyncResult: return RegistrySyncResult( actions=actions, artifact_uri=artifact_uri, + artifact_hash=artifact_result.content_hash, commit_sha=commit_sha, validation_errors=validation_errors, ) diff --git a/tracecat/registry/sync/schemas.py b/tracecat/registry/sync/schemas.py index e62febf6b2..46e89784a5 100644 --- a/tracecat/registry/sync/schemas.py +++ b/tracecat/registry/sync/schemas.py @@ -128,6 +128,10 @@ class RegistrySyncResult(BaseModel): default=None, description="Resolved commit SHA (None for builtin/local repos)", ) + artifact_hash: str | None = Field( + default=None, + description="SHA-256 content hash of the uploaded execution artifact", + ) validation_errors: dict[str, list[RegistryActionValidationErrorInfo]] = Field( default_factory=dict, description="Map of action name to validation errors", diff --git a/tracecat/registry/versions/schemas.py b/tracecat/registry/versions/schemas.py index 3f824ba68e..17f51d6ce4 100644 --- a/tracecat/registry/versions/schemas.py +++ b/tracecat/registry/versions/schemas.py @@ -161,6 +161,13 @@ class RegistryVersionCreate(BaseModel): ..., description="S3 URI to the execution artifact for this registry version", ) + artifact_hash: str | None = Field( + default=None, + min_length=64, + max_length=64, + pattern=r"^[a-fA-F0-9]{64}$", + description="SHA-256 content hash of the execution artifact", + ) class RegistryVersionRead(BaseModel): @@ -172,6 +179,7 @@ class RegistryVersionRead(BaseModel): commit_sha: str | None manifest: RegistryVersionManifest tarball_uri: str + artifact_hash: str | None = None created_at: datetime @@ -183,6 +191,7 @@ class RegistryVersionReadMinimal(BaseModel): version: str commit_sha: str | None tarball_uri: str + artifact_hash: str | None = None created_at: datetime action_count: int = Field( default=0, diff --git a/tracecat/registry/versions/service.py b/tracecat/registry/versions/service.py index 47aa140469..07e49491da 100644 --- a/tracecat/registry/versions/service.py +++ b/tracecat/registry/versions/service.py @@ -110,6 +110,7 @@ async def create_version( commit_sha=params.commit_sha, manifest=to_jsonable_python(params.manifest), tarball_uri=params.tarball_uri, + artifact_hash=params.artifact_hash, ) self.session.add(version) if commit: @@ -460,6 +461,7 @@ async def create_version( commit_sha=params.commit_sha, manifest=to_jsonable_python(params.manifest), tarball_uri=params.tarball_uri, + artifact_hash=params.artifact_hash, ) self.session.add(version) if commit: