From 22e631dc92b366d0c898cc31891412c1343757a8 Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 17 Jun 2026 14:23:44 +1000 Subject: [PATCH 1/3] feat(regression): add R2 write backend for native baselines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the Cloudflare R2 (S3-compatible) write backend so `ref test-cases mint` can upload native baseline blobs to the shared object store, not just a local path. Blobs are content-addressed with a flat, digest-keyed layout, so each is served publicly at `{url}/{digest}` and reused across cases and re-mints. The S3 endpoint and bucket are non-secret config (defaulting to the production R2 bucket); write credentials are resolved at upload time only — explicit `REF_NATIVE_STORE_ACCESS_KEY_ID` / `REF_NATIVE_STORE_SECRET_ACCESS_KEY`, else a named `REF_NATIVE_STORE_PROFILE`, else boto3's default credential chain — so secrets never land in the persisted config. boto3 is an optional `aws` extra; the read and replay paths never need it. Harden the minting developer experience: - `mint` preflights the store (credentials and bucket reachability) and fails fast with an actionable message before running any diagnostics. - add `ref test-cases check-store` to verify connectivity without minting, and `mint --dry-run` to preview scope after the preflight. - `ref test-cases fetch` now regenerates the gitignored `catalog.paths.yaml` when it is missing even if the committed catalog is unchanged, so a plain fetch works on a fresh checkout (no `--force` needed). --- packages/climate-ref-core/pyproject.toml | 7 + .../climate_ref_core/regression/__init__.py | 2 + .../src/climate_ref_core/regression/store.py | 397 ++++++++++++++++-- .../src/climate_ref_core/testing.py | 117 ++++-- .../tests/unit/regression/test_store.py | 273 +++++++++++- .../tests/unit/test_testing.py | 28 ++ .../src/climate_ref/cli/__init__.py | 1 + .../src/climate_ref/cli/test_cases.py | 76 +++- .../climate-ref/src/climate_ref/config.py | 28 +- .../tests/unit/cli/test_test_cases.py | 89 ++++ .../climate-ref/tests/unit/test_config.py | 3 +- pyproject.toml | 3 + uv.lock | 121 ++++++ 13 files changed, 1050 insertions(+), 95 deletions(-) diff --git a/packages/climate-ref-core/pyproject.toml b/packages/climate-ref-core/pyproject.toml index 6233bbab6..4bcf825b1 100644 --- a/packages/climate-ref-core/pyproject.toml +++ b/packages/climate-ref-core/pyproject.toml @@ -56,6 +56,13 @@ dependencies = [ "fastprogress==1.0.5" ] +[project.optional-dependencies] +# Credentialed S3-compatible write backend for the native baseline store (Cloudflare R2). +# Only required for the `ref test-cases mint` upload path; read/replay never need it. +aws = [ + "boto3>=1.34", +] + [dependency-groups] dev = [ "types-requests", diff --git a/packages/climate-ref-core/src/climate_ref_core/regression/__init__.py b/packages/climate-ref-core/src/climate_ref_core/regression/__init__.py index d92acc1ca..e9d7ca5c6 100644 --- a/packages/climate-ref-core/src/climate_ref_core/regression/__init__.py +++ b/packages/climate-ref-core/src/climate_ref_core/regression/__init__.py @@ -41,6 +41,7 @@ from climate_ref_core.regression.store import ( LocalFilesystemStore, NativeStore, + NativeStoreUnavailableError, PoochReadStore, R2WriteStore, build_native_store, @@ -55,6 +56,7 @@ "Manifest", "NativeEntry", "NativeStore", + "NativeStoreUnavailableError", "PoochReadStore", "R2WriteStore", "Tolerance", diff --git a/packages/climate-ref-core/src/climate_ref_core/regression/store.py b/packages/climate-ref-core/src/climate_ref_core/regression/store.py index 355416f96..48b70eaab 100644 --- a/packages/climate-ref-core/src/climate_ref_core/regression/store.py +++ b/packages/climate-ref-core/src/climate_ref_core/regression/store.py @@ -8,22 +8,32 @@ - :class:`PoochReadStore`: anonymous public-read store backed by a URL, using :mod:`pooch` for caching, retry, and hash verification. Write is intentionally unsupported (``put`` raises :class:`NotImplementedError`). -- :class:`R2WriteStore`: stub for the future Cloudflare R2 write backend. - Construction raises :class:`NotImplementedError` until implemented. +- :class:`R2WriteStore`: credentialed S3-compatible write backend for Cloudflare R2. + Used by the ``mint`` verb to upload native blobs; reads go through + :class:`PoochReadStore` against the public read URL. The factory :func:`build_native_store` selects the appropriate implementation based on the application :class:`~climate_ref.config.Config` and the ``writable`` flag. ``writable=False`` never requires credentials. +Write credentials are **never** read from the persisted config: the S3 endpoint and +bucket are non-secret routing config, while authentication is resolved at client-build time +only, in precedence order: explicit ``REF_NATIVE_STORE_ACCESS_KEY_ID`` / +``REF_NATIVE_STORE_SECRET_ACCESS_KEY`` env vars, then a named ``REF_NATIVE_STORE_PROFILE``, +then boto3's default credential chain (which honours an ambient ``AWS_PROFILE``). + Blobs are keyed by their **sha256 hex digest**. The :class:`LocalFilesystemStore` uses a two-level directory layout ``//`` similar to git's object storage. +The remote stores (:class:`PoochReadStore`, :class:`R2WriteStore`) use a flat layout +(object key == digest), so a blob is served at ``{public_url}/{digest}``. """ +import os import shutil from functools import cache from pathlib import Path -from typing import Protocol, runtime_checkable +from typing import Any, Protocol, runtime_checkable from urllib.parse import unquote, urlsplit import pooch @@ -34,6 +44,26 @@ from .manifest import _validate_digest, sha256_file +# S3 error codes / HTTP status that denote a missing object on a HEAD/GET. +_MISSING_OBJECT_CODES = ("404", "NoSuchKey", "NotFound") +_HTTP_NOT_FOUND = 404 +_HTTP_BAD_REQUEST = 400 +_HTTP_UNAUTHORIZED = 401 +_HTTP_FORBIDDEN = 403 +# On a trivial preflight HEAD (bucket + sentinel key + signature), these all mean the +# credentials could not be authenticated: malformed key (400), unknown/revoked key (401). +_AUTH_REJECTED_STATUSES = (_HTTP_BAD_REQUEST, _HTTP_UNAUTHORIZED) + + +class NativeStoreUnavailableError(RuntimeError): + """ + Raised when a native store cannot be reached or used. + + Covers rejected credentials, a missing bucket, or an unwritable local directory. + The message is operator-facing and actionable (it names the env vars / path to check), + so callers can surface it directly. + """ + @cache def _pooch_manager(base_url: str, cache_dir: str) -> pooch.Pooch: @@ -52,6 +82,64 @@ def _pooch_manager(base_url: str, cache_dir: str) -> pooch.Pooch: ) +@cache +def _s3_client(endpoint_url: str, access_key_id: str, secret_access_key: str, profile: str) -> Any: + """ + Build (and cache) an S3-compatible client for a Cloudflare R2 endpoint. + + boto3 is imported lazily so the read/replay paths (and any environment without the + optional ``aws`` extra installed) never pull in boto3. The client is cached by its + immutable inputs so many ``put`` calls in a single ``mint`` run reuse one client. + + Authentication precedence (each empty value falls through to the next): + + 1. Explicit ``access_key_id`` / ``secret_access_key`` (from the REF cred env vars). + 2. A named ``profile`` from ``~/.aws/{config,credentials}``. + 3. boto3's default credential chain (ambient ``AWS_PROFILE`` / ``AWS_ACCESS_KEY_ID`` / + instance profile, etc.). + + R2 requires SigV4 and a fixed ``auto`` region; path-style addressing avoids virtual-host + DNS requirements against the account endpoint. + + Parameters + ---------- + endpoint_url + The S3 API endpoint of the R2 bucket's account + (e.g. ``https://.eu.r2.cloudflarestorage.com``), without the bucket. + access_key_id + R2 access-key id, or ``""`` to fall through to the profile / default chain. + secret_access_key + R2 secret-access-key, or ``""`` to fall through to the profile / default chain. + profile + Named AWS/R2 profile to load credentials from, or ``""`` for the default session + (which still honours an ambient ``AWS_PROFILE``). + + Returns + ------- + : + A configured boto3 S3 client (typed ``Any``; boto3 ships no inline types). + """ + try: + import boto3 # noqa: PLC0415 - optional dependency, imported lazily + from botocore.config import Config as BotoConfig # noqa: PLC0415 - optional dependency + except ImportError as exc: # pragma: no cover - exercised only without the extra + raise ImportError( + "Minting to a remote native store requires boto3, which is an optional " + "dependency. Install it with the 'aws' extra, e.g. " + "`uv pip install 'climate-ref-core[aws]'`." + ) from exc + + session = boto3.Session(profile_name=profile or None) + return session.client( + "s3", + endpoint_url=endpoint_url, + aws_access_key_id=access_key_id or None, + aws_secret_access_key=secret_access_key or None, + region_name="auto", + config=BotoConfig(signature_version="s3v4", s3={"addressing_style": "path"}), + ) + + @runtime_checkable class NativeStore(Protocol): """ @@ -115,6 +203,22 @@ def put(self, path: Path) -> str: """ ... + def preflight(self) -> None: + """ + Verify the store is reachable and usable before relying on it. + + For writable stores this checks the credentials and target (bucket, or that the local + directory is writable); for anonymous read-only stores it may be a no-op. Intended to + be called once up front (e.g. before a slow ``mint`` run) so a misconfiguration is + caught early. + + Raises + ------ + NativeStoreUnavailableError + If the store cannot be reached or used, with an operator-facing message. + """ + ... + @frozen class LocalFilesystemStore: @@ -219,6 +323,25 @@ def put(self, path: Path) -> str: logger.debug(f"LocalFilesystemStore.put: {digest} already present, skipping copy") return digest + def preflight(self) -> None: + """ + Verify the store root exists (creating it if needed) and is writable. + + Raises + ------ + NativeStoreUnavailableError + If the root cannot be created or is not writable. + """ + try: + self.root.mkdir(parents=True, exist_ok=True) + except OSError as exc: + raise NativeStoreUnavailableError( + f"Local native store root {self.root} could not be created: {exc}" + ) from exc + if not os.access(self.root, os.W_OK): + raise NativeStoreUnavailableError(f"Local native store root {self.root} is not writable.") + logger.debug(f"Local native store ready at {self.root}") + @frozen class PoochReadStore: @@ -308,37 +431,230 @@ def put(self, path: Path) -> str: "Use a writable store (LocalFilesystemStore or R2WriteStore) for minting." ) + def preflight(self) -> None: + """ + No-op: an anonymous public-read store has nothing to verify up front. + + It has no credentials, and every read is hash-checked per blob; this exists only to + satisfy the :class:`NativeStore` protocol. + """ + return None + @frozen class R2WriteStore: """ - Stub for the future Cloudflare R2 write backend. + Credentialed S3-compatible write backend for a Cloudflare R2 bucket. - This class documents the seam for the credentialed write backend - and will be implemented in a follow-up once the R2 credentials and - bucket lifecycle policy are in place. + Used by the ``mint`` verb to upload native blobs. Reads in CI and for local replay go + through :class:`PoochReadStore` against the public read URL, so this store's ``fetch`` / + ``has`` exist mainly for mint-time idempotence and verification. - Construction raises :class:`NotImplementedError` with a deferral message. + Blobs are content-addressed with a **flat** key layout (object key == ``key_prefix`` + + digest), so a blob is served at ``{public_url}/{key_prefix}{digest}``. The public read + domain is expected to map to the bucket root, so ``key_prefix`` defaults to ``""``. + + boto3 is imported lazily (see :func:`_s3_client`); constructing this store does not + require boto3, only the endpoint and bucket. Credentials are passed in explicitly and + are never sourced from the persisted config. + + Parameters + ---------- + endpoint_url + S3 API endpoint for the bucket's account, without the bucket + (e.g. ``https://.eu.r2.cloudflarestorage.com``). + bucket + Name of the R2 bucket (e.g. ``ref-baselines``). + access_key_id + R2 access-key id, or ``""`` to fall through to ``profile`` / boto3's default chain. + secret_access_key + R2 secret-access-key, or ``""`` to fall through to ``profile`` / boto3's default chain. + profile + Named AWS/R2 profile to authenticate with, or ``""`` for the default session. + Ignored when explicit ``access_key_id`` / ``secret_access_key`` are supplied. + key_prefix + Optional object-key prefix. Defaults to ``""`` (flat, bucket-root layout). """ + endpoint_url: str + bucket: str + access_key_id: str = "" + secret_access_key: str = "" + profile: str = "" + key_prefix: str = "" + def __attrs_post_init__(self) -> None: - raise NotImplementedError( - "Remote writable native store (R2 backend) is deferred to a follow-up PR. " - "Anonymous public-read URL + credentialed S3-compatible PUT will be wired here. " - "Use a local store URL (file:// or a filesystem path) for minting in the meantime." - ) + """Fail fast at construction (mint startup) when routing config is missing.""" + if not self.endpoint_url: + raise ValueError( + "R2 native store requires an S3 endpoint URL; set REF_NATIVE_STORE_S3_ENDPOINT_URL " + "(e.g. https://.eu.r2.cloudflarestorage.com)." + ) + if not self.bucket: + raise ValueError( + "R2 native store requires a bucket name; set REF_NATIVE_STORE_BUCKET (e.g. ref-baselines)." + ) + + def _key(self, digest: str) -> str: + """Return the object key for a blob, validating the digest first. + + The digest is validated as 64-character lowercase hex, so a malformed or hostile + digest cannot inject an unexpected object key. + """ + _validate_digest(digest) + return f"{self.key_prefix}{digest}" + + def _client(self) -> Any: + """Return the cached boto3 S3 client for this store's endpoint and credentials.""" + return _s3_client(self.endpoint_url, self.access_key_id, self.secret_access_key, self.profile) + + @staticmethod + def _is_missing(exc: Exception) -> bool: + """Return ``True`` when a botocore ``ClientError`` denotes a missing object (404).""" + response = getattr(exc, "response", None) + if not isinstance(response, dict): + return False + code = response.get("Error", {}).get("Code") + status = response.get("ResponseMetadata", {}).get("HTTPStatusCode") + return code in _MISSING_OBJECT_CODES or status == _HTTP_NOT_FOUND + + def has(self, digest: str) -> bool: + """ + Return ``True`` if the blob is present in the bucket. - def has(self, digest: str) -> bool: # pragma: no cover - """Not implemented — R2 backend deferred.""" - raise NotImplementedError("R2 backend deferred") + Parameters + ---------- + digest + The sha256 hex digest of the blob. + + Returns + ------- + : + ``True`` when a ``HEAD`` on the object succeeds, ``False`` on a 404. + """ + from botocore.exceptions import ClientError # noqa: PLC0415 - optional dependency + + try: + self._client().head_object(Bucket=self.bucket, Key=self._key(digest)) + except ClientError as exc: + if self._is_missing(exc): + return False + raise + return True + + def fetch(self, digest: str, dest: Path) -> None: + """ + Download the blob to ``dest`` and verify its sha256 matches ``digest``. + + Parameters + ---------- + digest + The sha256 hex digest of the blob to fetch. + dest + Destination path to write the blob to. + Parent directories are created if they do not exist. + + Raises + ------ + FileNotFoundError + If the blob is not present in the bucket. + ValueError + If the downloaded blob's sha256 does not match ``digest``. + """ + from botocore.exceptions import ClientError # noqa: PLC0415 - optional dependency + + dest.parent.mkdir(parents=True, exist_ok=True) + try: + self._client().download_file(self.bucket, self._key(digest), str(dest)) + except ClientError as exc: + if self._is_missing(exc): + raise FileNotFoundError(f"Blob {digest!r} not found in R2 bucket {self.bucket!r}") from exc + raise + _verify_hash_matches(dest, digest) + logger.debug(f"R2WriteStore.fetch: {digest} -> {dest}") + + def put(self, path: Path) -> str: + """ + Upload the file at ``path`` to the bucket and return its sha256 hex digest. + + The blob is content-addressed: the upload is skipped when an object with the same + digest already exists, so minting is idempotent and re-mints are cheap. + + Parameters + ---------- + path + Path to the file to store. + + Returns + ------- + : + The sha256 hex digest of the stored blob. + """ + digest = sha256_file(path) + if self.has(digest): + logger.debug(f"R2WriteStore.put: {digest} already present, skipping upload") + return digest + self._client().upload_file(str(path), self.bucket, self._key(digest)) + logger.debug(f"R2WriteStore.put: {path} -> s3://{self.bucket}/{self._key(digest)}") + return digest + + @staticmethod + def _http_status(exc: Exception) -> int | None: + """Return the HTTP status code from a botocore ``ClientError``, if present.""" + response = getattr(exc, "response", None) + if isinstance(response, dict): + status = response.get("ResponseMetadata", {}).get("HTTPStatusCode") + if isinstance(status, int): + return status + return None + + def preflight(self) -> None: + """ + Verify the bucket is reachable and the credentials are accepted, before any upload. - def fetch(self, digest: str, dest: Path) -> None: # pragma: no cover - """Not implemented — R2 backend deferred.""" - raise NotImplementedError("R2 backend deferred") + Performs a cheap authenticated ``HEAD`` on a sentinel key (expected to be absent). A + ``404`` means the request authenticated and the store is usable; ``401`` / ``403`` are + translated into actionable :class:`NativeStoreUnavailableError` messages so a + misconfigured credential is caught before the (slow) diagnostic run rather than after. - def put(self, path: Path) -> str: # pragma: no cover - """Not implemented — R2 backend deferred.""" - raise NotImplementedError("R2 backend deferred") + ``head_object`` is used rather than ``head_bucket`` so the check works with + least-privilege, object-scoped tokens (which cannot perform bucket-level operations). + + Raises + ------ + NativeStoreUnavailableError + If the credentials are rejected (401), access is denied (403), or the probe + otherwise fails. + """ + from botocore.exceptions import ClientError # noqa: PLC0415 - optional dependency + + probe_key = f"{self.key_prefix}.ref-preflight-probe" + try: + self._client().head_object(Bucket=self.bucket, Key=probe_key) + except ClientError as exc: + status = self._http_status(exc) + if status == _HTTP_NOT_FOUND: + pass # authenticated; the probe object is simply absent — store is usable + elif status in _AUTH_REJECTED_STATUSES: + raise NativeStoreUnavailableError( + f"Native store authentication failed (HTTP {status}) for bucket {self.bucket!r} at " + f"{self.endpoint_url}: the credentials were rejected or malformed. Check " + f"REF_NATIVE_STORE_PROFILE, or REF_NATIVE_STORE_ACCESS_KEY_ID / " + f"REF_NATIVE_STORE_SECRET_ACCESS_KEY." + ) from exc + elif status == _HTTP_FORBIDDEN: + raise NativeStoreUnavailableError( + f"Native store access denied (HTTP 403) for bucket {self.bucket!r} at " + f"{self.endpoint_url}: the request was forbidden — the secret key may be wrong, or " + f"the token may lack object read & write on this bucket. Check the credentials and " + f"the token's permissions." + ) from exc + else: + raise NativeStoreUnavailableError( + f"Native store preflight failed (HTTP {status}) for bucket {self.bucket!r} at " + f"{self.endpoint_url}: {exc}" + ) from exc + logger.info(f"Native store authenticated: bucket {self.bucket!r} at {self.endpoint_url}") class _NativeStoreConfigProtocol(Protocol): @@ -350,6 +666,9 @@ class _NativeStoreConfigProtocol(Protocol): This keeps ``climate_ref_core`` free of any import dependency on ``climate_ref``. + ``s3_endpoint_url`` and ``bucket`` are non-secret routing config consumed only by the + writable (R2) backend. Write credentials are intentionally **not** part of this protocol + — they are read from the environment at client-build time, never from the config object. """ @property @@ -358,6 +677,12 @@ def url(self) -> str: ... @property def cache_dir(self) -> Path: ... + @property + def s3_endpoint_url(self) -> str: ... + + @property + def bucket(self) -> str: ... + def build_native_store(config: _NativeStoreConfigProtocol, *, writable: bool) -> NativeStore: """ @@ -369,24 +694,33 @@ def build_native_store(config: _NativeStoreConfigProtocol, *, writable: bool) -> With ``writable=False`` the returned store is always anonymous and credential-free (suitable for CI read/replay paths). - With ``writable=True`` and a local URL/path a :class:`LocalFilesystemStore` - or with a remote URL a :class:`R2WriteStore` is attempted - (currently deferred — raises :class:`NotImplementedError`). + With ``writable=True`` and a local URL/path a :class:`LocalFilesystemStore` is returned; + with a remote (``http(s)``) URL a credentialed :class:`R2WriteStore` is returned. The S3 + endpoint and bucket come from the config; authentication is read from the environment + (``REF_NATIVE_STORE_ACCESS_KEY_ID`` / ``REF_NATIVE_STORE_SECRET_ACCESS_KEY``, else + ``REF_NATIVE_STORE_PROFILE``, else boto3's default chain), so secrets never live in the + persisted config. Parameters ---------- config - A config object providing ``url`` and ``cache_dir``. + A config object providing ``url``, ``cache_dir``, ``s3_endpoint_url`` and ``bucket``. Typically ``app_config.native_store``. writable When ``False``, return a read-only store (no credentials required). When ``True``, return a writable store (``LocalFilesystemStore`` for local - paths, or a :class:`R2WriteStore` for remote URLs — deferred). + paths, or a :class:`R2WriteStore` for remote URLs). Returns ------- : A :class:`NativeStore` implementation appropriate for the configuration. + + Raises + ------ + ValueError + If the URL scheme is unrecognised, or a writable remote store is requested + without an S3 endpoint / bucket configured. """ url: str = config.url cache_dir: Path = config.cache_dir @@ -396,8 +730,13 @@ def build_native_store(config: _NativeStoreConfigProtocol, *, writable: bool) -> if scheme in ("http", "https"): if writable: - # TODO: Construction raises NotImplementedError until the follow-up lands. - return R2WriteStore() + return R2WriteStore( + endpoint_url=config.s3_endpoint_url, + bucket=config.bucket, + access_key_id=os.environ.get("REF_NATIVE_STORE_ACCESS_KEY_ID", ""), + secret_access_key=os.environ.get("REF_NATIVE_STORE_SECRET_ACCESS_KEY", ""), + profile=os.environ.get("REF_NATIVE_STORE_PROFILE", ""), + ) return PoochReadStore(base_url=url.rstrip("/"), cache_dir=cache_dir) if scheme == "file": diff --git a/packages/climate-ref-core/src/climate_ref_core/testing.py b/packages/climate-ref-core/src/climate_ref_core/testing.py index a2f60c9bd..78008c26e 100644 --- a/packages/climate-ref-core/src/climate_ref_core/testing.py +++ b/packages/climate-ref-core/src/climate_ref_core/testing.py @@ -409,50 +409,19 @@ def _sanitize_for_yaml(value: Any) -> Any: return value -def save_datasets_to_yaml( +def _serialise_datasets( datasets: ExecutionDatasetCollection, - path: Path, - *, - force: bool = False, -) -> bool: +) -> tuple[dict[str, Any], dict[str, str]]: """ - Save ExecutionDatasetCollection to a YAML file. - - Paths are saved to a separate `.paths.yaml` file to allow the main - catalog to be version-controlled while paths remain user-specific. - - Multi-file datasets (e.g., time-chunked data) are stored as multiple rows, - one per file. Paths are keyed by `{instance_id}::{filename}` to support - multiple files per dataset. - - By default, the catalog is only written if the content has changed - (detected via hash comparison). Use `force=True` to always write. - - Parameters - ---------- - datasets - The datasets to save - path - Path to write the YAML file - force - If True, always write the catalog even if unchanged + Build the version-controlled catalog payload and its separate local-paths map. - Returns - ------- - : - True if the catalog was written, False if skipped (unchanged) + Returns a ``(data, paths_map)`` tuple: ``data`` is the catalog content (metadata plus + per-source records with local paths stripped out), and ``paths_map`` maps + ``{instance_id}::{filename}`` to each local file path. The two are written to + ``catalog.yaml`` and the gitignored ``catalog.paths.yaml`` respectively. """ - # Compute the hash first to check if we need to write - new_hash = datasets.hash - - if not force: - existing_hash = get_catalog_hash(path) - if existing_hash == new_hash: - logger.info(f"Catalog unchanged, skipping write: {path}") - return False - data: dict[str, Any] = { - "_metadata": {"hash": new_hash}, + "_metadata": {"hash": datasets.hash}, } paths_map: dict[str, str] = {} @@ -485,14 +454,76 @@ def save_datasets_to_yaml( "datasets": filtered_records, } - path.parent.mkdir(parents=True, exist_ok=True) + return data, paths_map - with open(path, "w") as f: - yaml.dump(data, f, default_flow_style=False, sort_keys=False) - paths_file = _get_paths_file(path) +def _write_paths_file(paths_file: Path, paths_map: dict[str, str]) -> None: + """Write the gitignored local-paths sidecar (``catalog.paths.yaml``).""" + paths_file.parent.mkdir(parents=True, exist_ok=True) with open(paths_file, "w") as f: yaml.dump(paths_map, f, default_flow_style=False, sort_keys=False) + + +def save_datasets_to_yaml( + datasets: ExecutionDatasetCollection, + path: Path, + *, + force: bool = False, +) -> bool: + """ + Save ExecutionDatasetCollection to a YAML file. + + Paths are saved to a separate `.paths.yaml` file to allow the main + catalog to be version-controlled while paths remain user-specific. + + Multi-file datasets (e.g., time-chunked data) are stored as multiple rows, + one per file. Paths are keyed by `{instance_id}::{filename}` to support + multiple files per dataset. + + By default, the catalog is only written if the content has changed + (detected via hash comparison). Use `force=True` to always write. + + The gitignored `.paths.yaml` sidecar is (re)generated whenever it is **missing**, even + when the catalog content is unchanged: on a fresh checkout the version-controlled + `catalog.yaml` exists but the local paths file does not, and `run`/`mint` need it to + resolve inputs. In that case the catalog itself is left untouched (so a plain + `ref test-cases fetch` is enough — no `--force` required) and only the paths file is written. + + Parameters + ---------- + datasets + The datasets to save + path + Path to write the YAML file + force + If True, always write the catalog even if unchanged + + Returns + ------- + : + True if the catalog was (re)written, False if the catalog was left unchanged + (the paths sidecar may still have been regenerated). + """ + new_hash = datasets.hash + paths_file = _get_paths_file(path) + + if not force and get_catalog_hash(path) == new_hash: + # Catalog content is unchanged. Still regenerate the gitignored paths sidecar if it + # is missing (e.g. a fresh checkout) so run/mint can resolve inputs, but leave the + # version-controlled catalog untouched to avoid spurious diffs. + if paths_file.exists(): + logger.info(f"Catalog unchanged, skipping write: {path}") + else: + _, paths_map = _serialise_datasets(datasets) + _write_paths_file(paths_file, paths_map) + logger.info(f"Catalog unchanged; regenerated missing paths file: {paths_file}") + return False + + data, paths_map = _serialise_datasets(datasets) + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + yaml.dump(data, f, default_flow_style=False, sort_keys=False) + _write_paths_file(paths_file, paths_map) logger.info(f"Saved catalog to {path} (paths: {paths_file})") return True diff --git a/packages/climate-ref-core/tests/unit/regression/test_store.py b/packages/climate-ref-core/tests/unit/regression/test_store.py index fa4625fba..c0f22a71c 100644 --- a/packages/climate-ref-core/tests/unit/regression/test_store.py +++ b/packages/climate-ref-core/tests/unit/regression/test_store.py @@ -2,11 +2,13 @@ from pathlib import Path import pytest +from botocore.exceptions import ClientError from pytest_mock import MockerFixture from climate_ref_core.regression.store import ( LocalFilesystemStore, NativeStore, + NativeStoreUnavailableError, PoochReadStore, R2WriteStore, build_native_store, @@ -132,9 +134,7 @@ def test_fetch_missing_raises_file_not_found( "0" * 65, # too long ], ) - def test_blob_path_rejects_bad_digest( - self, local_store: LocalFilesystemStore, bad_digest: str - ) -> None: + def test_blob_path_rejects_bad_digest(self, local_store: LocalFilesystemStore, bad_digest: str) -> None: with pytest.raises(ValueError, match="Invalid sha256 digest"): local_store.has(bad_digest) @@ -173,6 +173,23 @@ def test_two_level_layout( expected = local_store.root / blob_digest[:2] / blob_digest assert expected.exists() + def test_preflight_creates_and_accepts_root(self, tmp_path: Path) -> None: + store = LocalFilesystemStore(root=tmp_path / "new-store") + assert not store.root.exists() + store.preflight() # must not raise; creates the root + assert store.root.is_dir() + + def test_preflight_raises_when_not_writable(self, tmp_path: Path) -> None: + root = tmp_path / "ro-store" + root.mkdir() + root.chmod(0o500) # read+execute, not writable + try: + store = LocalFilesystemStore(root=root) + with pytest.raises(NativeStoreUnavailableError, match="not writable"): + store.preflight() + finally: + root.chmod(0o700) # restore so tmp cleanup can remove it + class TestPoochReadStore: def test_satisfies_protocol(self, tmp_path: Path) -> None: @@ -223,18 +240,20 @@ def test_fetch_hash_verified( store.fetch(blob_digest, dest) -class TestR2WriteStore: - def test_construction_raises_not_implemented(self) -> None: - with pytest.raises(NotImplementedError, match="deferred to a follow-up PR"): - R2WriteStore() - - class _StubConfig: """Minimal config double satisfying _NativeStoreConfigProtocol.""" - def __init__(self, url: str, cache_dir: Path) -> None: + def __init__( + self, + url: str, + cache_dir: Path, + s3_endpoint_url: str = "https://account.r2.cloudflarestorage.com", + bucket: str = "ref-baselines", + ) -> None: self._url = url self._cache_dir = cache_dir + self._s3_endpoint_url = s3_endpoint_url + self._bucket = bucket @property def url(self) -> str: @@ -244,6 +263,14 @@ def url(self) -> str: def cache_dir(self) -> Path: return self._cache_dir + @property + def s3_endpoint_url(self) -> str: + return self._s3_endpoint_url + + @property + def bucket(self) -> str: + return self._bucket + class TestBuildNativeStore: def test_writable_false_local_path_returns_local_store(self, tmp_path: Path) -> None: @@ -298,9 +325,65 @@ def test_file_url_with_host_raises_value_error(self, tmp_path: Path) -> None: with pytest.raises(ValueError, match="host component"): build_native_store(cfg, writable=False) - def test_writable_true_remote_url_raises_not_implemented(self, tmp_path: Path) -> None: + def test_writable_true_remote_url_returns_r2_store(self, tmp_path: Path, monkeypatch) -> None: + monkeypatch.setenv("REF_NATIVE_STORE_ACCESS_KEY_ID", "akid") + monkeypatch.setenv("REF_NATIVE_STORE_SECRET_ACCESS_KEY", "secret") + monkeypatch.delenv("REF_NATIVE_STORE_PROFILE", raising=False) + cfg = _StubConfig( + url="https://baselines.example.com", + cache_dir=tmp_path / "cache", + s3_endpoint_url="https://account.r2.cloudflarestorage.com", + bucket="ref-baselines", + ) + store = build_native_store(cfg, writable=True) + assert isinstance(store, R2WriteStore) + assert store.endpoint_url == "https://account.r2.cloudflarestorage.com" + assert store.bucket == "ref-baselines" + assert store.access_key_id == "akid" + assert store.secret_access_key == "secret" # noqa: S105 - test fixture value, not a real secret + assert store.profile == "" + + def test_writable_true_remote_reads_creds_from_env_not_config(self, tmp_path: Path, monkeypatch) -> None: + # Credentials must come from the environment, never from the (serialisable) config. + monkeypatch.delenv("REF_NATIVE_STORE_ACCESS_KEY_ID", raising=False) + monkeypatch.delenv("REF_NATIVE_STORE_SECRET_ACCESS_KEY", raising=False) + monkeypatch.delenv("REF_NATIVE_STORE_PROFILE", raising=False) + cfg = _StubConfig(url="https://baselines.example.com", cache_dir=tmp_path / "cache") + store = build_native_store(cfg, writable=True) + assert isinstance(store, R2WriteStore) + # Empty creds + empty profile → boto3 default credential chain is used at client-build time. + assert store.access_key_id == "" + assert store.secret_access_key == "" + assert store.profile == "" + + def test_writable_true_remote_reads_profile_from_env(self, tmp_path: Path, monkeypatch) -> None: + # A named profile authenticates without putting secrets in the config or env creds. + monkeypatch.delenv("REF_NATIVE_STORE_ACCESS_KEY_ID", raising=False) + monkeypatch.delenv("REF_NATIVE_STORE_SECRET_ACCESS_KEY", raising=False) + monkeypatch.setenv("REF_NATIVE_STORE_PROFILE", "cf-ref") cfg = _StubConfig(url="https://baselines.example.com", cache_dir=tmp_path / "cache") - with pytest.raises(NotImplementedError, match="deferred to a follow-up PR"): + store = build_native_store(cfg, writable=True) + assert isinstance(store, R2WriteStore) + assert store.profile == "cf-ref" + assert store.access_key_id == "" + assert store.secret_access_key == "" + + def test_writable_true_remote_without_endpoint_raises(self, tmp_path: Path) -> None: + cfg = _StubConfig( + url="https://baselines.example.com", + cache_dir=tmp_path / "cache", + s3_endpoint_url="", + ) + with pytest.raises(ValueError, match="S3 endpoint URL"): + build_native_store(cfg, writable=True) + + def test_writable_true_remote_without_bucket_raises(self, tmp_path: Path) -> None: + cfg = _StubConfig( + url="https://baselines.example.com", + cache_dir=tmp_path / "cache", + bucket="", + ) + with pytest.raises(ValueError, match="bucket name"): build_native_store(cfg, writable=True) def test_unsupported_scheme_raises_value_error(self, tmp_path: Path) -> None: @@ -309,3 +392,169 @@ def test_unsupported_scheme_raises_value_error(self, tmp_path: Path) -> None: cfg = _StubConfig(url=url, cache_dir=tmp_path / "cache") with pytest.raises(ValueError, match="not recognised"): build_native_store(cfg, writable=False) + + +def _client_error(code: str, status: int, operation: str = "HeadObject") -> ClientError: + """Build a botocore ``ClientError`` with the given S3 error code / HTTP status.""" + return ClientError( + {"Error": {"Code": code, "Message": code}, "ResponseMetadata": {"HTTPStatusCode": status}}, + operation, + ) + + +class TestR2WriteStore: + """Behaviour of the credentialed R2 write backend with a mocked boto3 client. + + The boto3 client is replaced by patching :func:`_s3_client`, so these tests neither + touch the network nor exercise the ``@cache`` on the real factory. + """ + + def _store(self, mocker: MockerFixture, client, **kwargs) -> R2WriteStore: + mocker.patch("climate_ref_core.regression.store._s3_client", return_value=client) + params = { + "endpoint_url": "https://account.r2.cloudflarestorage.com", + "bucket": "ref-baselines", + "access_key_id": "akid", + "secret_access_key": "secret", + } + params.update(kwargs) + return R2WriteStore(**params) + + def test_construct_requires_endpoint(self) -> None: + with pytest.raises(ValueError, match="S3 endpoint URL"): + R2WriteStore(endpoint_url="", bucket="b") + + def test_construct_requires_bucket(self) -> None: + with pytest.raises(ValueError, match="bucket name"): + R2WriteStore(endpoint_url="https://x", bucket="") + + def test_client_threads_profile_to_factory(self, mocker: MockerFixture) -> None: + factory = mocker.patch( + "climate_ref_core.regression.store._s3_client", return_value=mocker.MagicMock() + ) + store = R2WriteStore( + endpoint_url="https://account.r2.cloudflarestorage.com", + bucket="ref-baselines", + profile="cf-ref", + ) + store.has("a" * 64) + factory.assert_called_once_with("https://account.r2.cloudflarestorage.com", "", "", "cf-ref") + + def test_key_validates_digest(self, mocker: MockerFixture) -> None: + store = self._store(mocker, mocker.MagicMock()) + with pytest.raises(ValueError): + store.has("not-a-valid-digest") + + def test_key_uses_flat_layout_with_prefix(self, mocker: MockerFixture) -> None: + client = mocker.MagicMock() + store = self._store(mocker, client, key_prefix="native/") + digest = "a" * 64 + store.has(digest) + client.head_object.assert_called_once_with(Bucket="ref-baselines", Key=f"native/{digest}") + + def test_put_uploads_when_absent(self, mocker: MockerFixture, tmp_path: Path) -> None: + client = mocker.MagicMock() + client.head_object.side_effect = _client_error("404", 404) + store = self._store(mocker, client) + blob = tmp_path / "blob.nc" + blob.write_bytes(b"hello") + digest = hashlib.sha256(b"hello").hexdigest() + + assert store.put(blob) == digest + client.upload_file.assert_called_once_with(str(blob), "ref-baselines", digest) + + def test_put_is_idempotent_when_present(self, mocker: MockerFixture, tmp_path: Path) -> None: + client = mocker.MagicMock() # head_object succeeds → blob already present + store = self._store(mocker, client) + blob = tmp_path / "blob.nc" + blob.write_bytes(b"hello") + digest = hashlib.sha256(b"hello").hexdigest() + + assert store.put(blob) == digest + client.upload_file.assert_not_called() + + def test_has_returns_true_when_present(self, mocker: MockerFixture) -> None: + store = self._store(mocker, mocker.MagicMock()) + assert store.has("b" * 64) is True + + def test_has_returns_false_on_404(self, mocker: MockerFixture) -> None: + client = mocker.MagicMock() + client.head_object.side_effect = _client_error("404", 404) + store = self._store(mocker, client) + assert store.has("b" * 64) is False + + def test_has_reraises_non_404(self, mocker: MockerFixture) -> None: + client = mocker.MagicMock() + client.head_object.side_effect = _client_error("AccessDenied", 403) + store = self._store(mocker, client) + with pytest.raises(ClientError): + store.has("c" * 64) + + def test_fetch_writes_and_verifies(self, mocker: MockerFixture, tmp_path: Path) -> None: + content = b"native-bytes" + digest = hashlib.sha256(content).hexdigest() + client = mocker.MagicMock() + client.download_file.side_effect = lambda bucket, key, dest: Path(dest).write_bytes(content) + store = self._store(mocker, client) + + dest = tmp_path / "nested" / "blob.nc" + store.fetch(digest, dest) + assert dest.read_bytes() == content + + def test_fetch_hash_mismatch_raises(self, mocker: MockerFixture, tmp_path: Path) -> None: + digest = hashlib.sha256(b"expected").hexdigest() + client = mocker.MagicMock() + client.download_file.side_effect = lambda bucket, key, dest: Path(dest).write_bytes(b"different") + store = self._store(mocker, client) + with pytest.raises(ValueError): + store.fetch(digest, tmp_path / "blob.nc") + + def test_fetch_missing_raises_filenotfound(self, mocker: MockerFixture, tmp_path: Path) -> None: + client = mocker.MagicMock() + client.download_file.side_effect = _client_error("404", 404, "GetObject") + store = self._store(mocker, client) + with pytest.raises(FileNotFoundError): + store.fetch("d" * 64, tmp_path / "blob.nc") + + def test_preflight_ok_on_404(self, mocker: MockerFixture) -> None: + # 404 on the sentinel HEAD = authenticated, object simply absent -> usable. + client = mocker.MagicMock() + client.head_object.side_effect = _client_error("404", 404) + store = self._store(mocker, client) + store.preflight() # must not raise + # probe is a HEAD on a sentinel key, never a real digest + _, kwargs = client.head_object.call_args + assert kwargs["Key"].endswith(".ref-preflight-probe") + + def test_preflight_ok_on_200(self, mocker: MockerFixture) -> None: + store = self._store(mocker, mocker.MagicMock()) # head_object succeeds + store.preflight() # must not raise + + def test_preflight_401_raises_actionable(self, mocker: MockerFixture) -> None: + client = mocker.MagicMock() + client.head_object.side_effect = _client_error("Unauthorized", 401) + store = self._store(mocker, client) + with pytest.raises(NativeStoreUnavailableError, match="REF_NATIVE_STORE_PROFILE"): + store.preflight() + + def test_preflight_400_treated_as_bad_credentials(self, mocker: MockerFixture) -> None: + # A malformed access key id makes R2 return 400 on the HEAD; treat it as a creds problem. + client = mocker.MagicMock() + client.head_object.side_effect = _client_error("BadRequest", 400) + store = self._store(mocker, client) + with pytest.raises(NativeStoreUnavailableError, match="credentials were rejected or malformed"): + store.preflight() + + def test_preflight_403_raises_actionable(self, mocker: MockerFixture) -> None: + client = mocker.MagicMock() + client.head_object.side_effect = _client_error("AccessDenied", 403) + store = self._store(mocker, client) + with pytest.raises(NativeStoreUnavailableError, match="403"): + store.preflight() + + def test_preflight_other_error_raises(self, mocker: MockerFixture) -> None: + client = mocker.MagicMock() + client.head_object.side_effect = _client_error("InternalError", 500) + store = self._store(mocker, client) + with pytest.raises(NativeStoreUnavailableError, match="preflight failed"): + store.preflight() diff --git a/packages/climate-ref-core/tests/unit/test_testing.py b/packages/climate-ref-core/tests/unit/test_testing.py index 63c60d9e8..414ce76ba 100644 --- a/packages/climate-ref-core/tests/unit/test_testing.py +++ b/packages/climate-ref-core/tests/unit/test_testing.py @@ -270,6 +270,34 @@ def test_save_creates_parent_dirs(self, tmp_path): assert yaml_path.exists() assert yaml_path.parent.exists() + def test_regenerates_missing_paths_when_catalog_unchanged(self, tmp_path): + """A fresh checkout has the committed catalog.yaml but not the gitignored paths file. + + Re-saving the same (unchanged) datasets must regenerate the missing paths file without + rewriting the version-controlled catalog — so a plain ``fetch`` (no ``--force``) is enough. + """ + df = pd.DataFrame({"instance_id": ["CMIP6.test.ds"], "path": ["/path/to/file.nc"]}) + collection = DatasetCollection(datasets=df, slug_column="instance_id", selector=()) + datasets = ExecutionDatasetCollection({SourceDatasetType.CMIP6: collection}) + + yaml_path = tmp_path / "catalog.yaml" + paths_file = yaml_path.with_suffix(".paths.yaml") + assert save_datasets_to_yaml(datasets, yaml_path) is True + assert paths_file.exists() + + # Simulate a fresh checkout: committed catalog present, gitignored paths file absent. + catalog_bytes_before = yaml_path.read_bytes() + paths_file.unlink() + + # Unchanged catalog → returns False (catalog not rewritten) but the paths file is restored. + assert save_datasets_to_yaml(datasets, yaml_path) is False + assert paths_file.exists() + # The version-controlled catalog must be byte-identical (no spurious diff). + assert yaml_path.read_bytes() == catalog_bytes_before + # And the regenerated paths must round-trip. + loaded = load_datasets_from_yaml(yaml_path) + assert loaded[SourceDatasetType.CMIP6].datasets["path"].tolist() == ["/path/to/file.nc"] + def test_load_with_selector(self, tmp_path): """Test loading YAML with selector information.""" yaml_content = """ diff --git a/packages/climate-ref/src/climate_ref/cli/__init__.py b/packages/climate-ref/src/climate_ref/cli/__init__.py index e64fe2f37..35e25235a 100644 --- a/packages/climate-ref/src/climate_ref/cli/__init__.py +++ b/packages/climate-ref/src/climate_ref/cli/__init__.py @@ -43,6 +43,7 @@ ("test-cases", "replay"), ("test-cases", "mint"), ("test-cases", "ci-gate"), + ("test-cases", "check-store"), } diff --git a/packages/climate-ref/src/climate_ref/cli/test_cases.py b/packages/climate-ref/src/climate_ref/cli/test_cases.py index 61a43680f..ddbe37fb4 100644 --- a/packages/climate-ref/src/climate_ref/cli/test_cases.py +++ b/packages/climate-ref/src/climate_ref/cli/test_cases.py @@ -1152,6 +1152,10 @@ def mint_native( # noqa: PLR0912, PLR0915 bool, typer.Option(help="Increment test_case_version when authoring the manifest"), ] = False, + dry_run: Annotated[ + bool, + typer.Option(help="Preflight the store and list what would be minted, without running or uploading"), + ] = False, ) -> None: """ Mint canonical native baselines @@ -1165,6 +1169,7 @@ def mint_native( # noqa: PLR0912, PLR0915 -------- ref test-cases mint --provider example ref test-cases mint --provider example --bump-version + ref test-cases mint --provider example --dry-run """ import tempfile @@ -1172,7 +1177,7 @@ def mint_native( # noqa: PLR0912, PLR0915 from climate_ref.testing import TestCaseRunner from climate_ref_core.regression.capture import capture_execution from climate_ref_core.regression.manifest import Manifest - from climate_ref_core.regression.store import build_native_store + from climate_ref_core.regression.store import NativeStoreUnavailableError, build_native_store from climate_ref_core.testing import TestCasePaths, load_datasets_from_yaml config: Config = ctx.obj.config @@ -1181,20 +1186,39 @@ def mint_native( # noqa: PLR0912, PLR0915 try: store = build_native_store(config.native_store, writable=True) - except NotImplementedError as exc: + except (NotImplementedError, ValueError) as exc: logger.error( - "Cannot mint: no writable native store is configured. " - "Set REF_NATIVE_STORE_URL to a writable location\n" - f"(a local file:// path for development): {exc}" + "Cannot mint: no writable native store is configured. For the remote (R2) store set " + "REF_NATIVE_STORE_S3_ENDPOINT_URL and REF_NATIVE_STORE_BUCKET, and authenticate via " + "REF_NATIVE_STORE_ACCESS_KEY_ID / REF_NATIVE_STORE_SECRET_ACCESS_KEY or a named " + "REF_NATIVE_STORE_PROFILE; or set REF_NATIVE_STORE_URL to a local file:// path for " + f"development: {exc}" ) raise typer.Exit(code=1) from exc + # Preflight the store (credentials / bucket reachability) before running any diagnostics, + # so a misconfiguration fails fast instead of after the (slow) execution. + try: + store.preflight() + except NativeStoreUnavailableError as exc: + logger.error(f"Cannot mint: {exc}") + raise typer.Exit(code=1) from exc + registry = ProviderRegistry.build_from_config(config, db) cases = list(_iter_test_cases(registry, provider=provider, diagnostic=diagnostic, test_case=test_case)) if not cases: logger.warning(f"No test cases found for provider {provider!r}") raise typer.Exit(code=0) + if dry_run: + # The store preflight has already passed at this point; report scope and stop before + # running any diagnostics or uploading anything. + console.print(f"[cyan]Dry run — would mint {len(cases)} test case(s):[/cyan]") + for diag, tc in cases: + console.print(f" - {provider}/{diag.slug}/{tc.name}") + console.print("[cyan]Store preflight passed; nothing was run or uploaded.[/cyan]") + return + minted = 0 failures: list[str] = [] @@ -1288,6 +1312,48 @@ def mint_native( # noqa: PLR0912, PLR0915 console.print(f"[green]Minted {minted} native baseline(s)[/green]") +@app.command(name="check-store") +def check_store( + ctx: typer.Context, +) -> None: + """ + Check connectivity and credentials for the writable native baseline store. + + Builds the writable store from the configuration and preflights it (an authenticated + no-op probe) without running any diagnostics or uploading anything. Use this to confirm a + mint will work — that the credentials (REF_NATIVE_STORE_PROFILE or the access-key env + vars) and the bucket are correct — before a slow mint run. + + Examples + -------- + ref test-cases check-store + REF_NATIVE_STORE_PROFILE=my-profile ref test-cases check-store + """ + from climate_ref_core.regression.store import NativeStoreUnavailableError, build_native_store + + config: Config = ctx.obj.config + console: Console = ctx.obj.console + + try: + store = build_native_store(config.native_store, writable=True) + except (NotImplementedError, ValueError) as exc: + logger.error( + "Native store is not configured for writing. For the remote (R2) store set " + "REF_NATIVE_STORE_S3_ENDPOINT_URL and REF_NATIVE_STORE_BUCKET, and authenticate via " + "REF_NATIVE_STORE_ACCESS_KEY_ID / REF_NATIVE_STORE_SECRET_ACCESS_KEY or a named " + f"REF_NATIVE_STORE_PROFILE; or set REF_NATIVE_STORE_URL to a local file:// path: {exc}" + ) + raise typer.Exit(code=1) from exc + + try: + store.preflight() + except NativeStoreUnavailableError as exc: + logger.error(str(exc)) + raise typer.Exit(code=1) from exc + + console.print("[green]Native store OK:[/green] credentials accepted and the store is reachable.") + + def _provider_source_root(diag: Diagnostic, repo_root: Path) -> str | None: """ Return the diagnostic's provider package source directory, relative to the repo root. diff --git a/packages/climate-ref/src/climate_ref/config.py b/packages/climate-ref/src/climate_ref/config.py index 19a06ecb4..92ab28a85 100644 --- a/packages/climate-ref/src/climate_ref/config.py +++ b/packages/climate-ref/src/climate_ref/config.py @@ -181,13 +181,31 @@ class NativeStoreConfig: for offline development and testing. """ - credentials: str = env_field(name="NATIVE_STORE_CREDENTIALS", default="") + s3_endpoint_url: str = env_field( + name="NATIVE_STORE_S3_ENDPOINT_URL", + default="https://2aa5172b2bba093c516027d6fa13cdc8.r2.cloudflarestorage.com", + ) + """ + S3 API endpoint for the writable (Cloudflare R2) backend, without the bucket. + + Non-secret routing config, consumed only by the ``mint`` verb. Defaults to the + production Climate-REF R2 account endpoint (default jurisdiction — note there is no + ``.eu`` in the host). Anonymous read (``fetch`` / ``has``) uses :attr:`url` instead and + never touches this. + Set ``REF_NATIVE_STORE_S3_ENDPOINT_URL`` to override (e.g. a staging account). """ - Credentials for write access to the native-bundle object store. - Only consumed by the ``mint`` verb (object-store upload). - Anonymous read (``fetch`` / ``has``) never requires credentials. - Set ``REF_NATIVE_STORE_CREDENTIALS`` to the appropriate token or key material. + bucket: str = env_field(name="NATIVE_STORE_BUCKET", default="ref-baselines-public") + """ + Name of the writable (Cloudflare R2) bucket. + + Non-secret routing config, consumed only by the ``mint`` verb. + Set ``REF_NATIVE_STORE_BUCKET`` to override. + + Write credentials are **not** stored here: the access-key id and secret-access-key are + read from ``REF_NATIVE_STORE_ACCESS_KEY_ID`` / ``REF_NATIVE_STORE_SECRET_ACCESS_KEY`` + (falling back to boto3's default credential chain) at upload time only, so secrets never + land in a serialised config. """ cache_dir: Path = env_field(name="NATIVE_STORE_CACHE_DIR", converter=Path) diff --git a/packages/climate-ref/tests/unit/cli/test_test_cases.py b/packages/climate-ref/tests/unit/cli/test_test_cases.py index 6055f5e15..44d969b3d 100644 --- a/packages/climate-ref/tests/unit/cli/test_test_cases.py +++ b/packages/climate-ref/tests/unit/cli/test_test_cases.py @@ -1569,6 +1569,55 @@ def test_mint_refuses_without_writable_store(self, invoke_cli, mocker, tmp_path) ) assert "Cannot mint" in result.stderr + def test_mint_fails_fast_on_preflight_error(self, invoke_cli, mocker): + """A store auth/connectivity failure must abort before any diagnostic runs.""" + from climate_ref_core.regression.store import NativeStoreUnavailableError + + registry, _diag, _tc = _make_case_mocks() + mocker.patch( + "climate_ref.provider_registry.ProviderRegistry.build_from_config", + return_value=registry, + ) + store = MagicMock() + store.preflight.side_effect = NativeStoreUnavailableError( + "Native store authentication failed (HTTP 401) for bucket 'ref-baselines-public'" + ) + mocker.patch( + "climate_ref_core.regression.store.build_native_store", + return_value=store, + ) + + result = invoke_cli( + ["test-cases", "mint", "--provider", "example"], + expected_exit_code=1, + ) + assert "Cannot mint" in result.stderr + assert "401" in result.stderr + store.preflight.assert_called_once() + # Fail-fast: the diagnostic runner must never have been reached. + store.put.assert_not_called() + + def test_mint_dry_run_lists_without_running(self, invoke_cli, mocker): + """--dry-run preflights + lists the cases, but runs no diagnostics and uploads nothing.""" + registry, _diag, _tc = _make_case_mocks() + mocker.patch( + "climate_ref.provider_registry.ProviderRegistry.build_from_config", + return_value=registry, + ) + store = MagicMock() # preflight passes (no side effect) + mocker.patch( + "climate_ref_core.regression.store.build_native_store", + return_value=store, + ) + runner_cls = mocker.patch("climate_ref.testing.TestCaseRunner") + + result = invoke_cli(["test-cases", "mint", "--provider", "example", "--dry-run"]) + assert "Dry run" in result.stdout + assert "example/test-diag/default" in result.stdout + store.preflight.assert_called_once() + store.put.assert_not_called() + runner_cls.assert_not_called() + def test_mint_writes_blobs_and_manifest(self, invoke_cli, mocker, tmp_path): from climate_ref_core.regression.manifest import Manifest from climate_ref_core.regression.store import LocalFilesystemStore @@ -1628,6 +1677,46 @@ def test_mint_writes_blobs_and_manifest(self, invoke_cli, mocker, tmp_path): assert store.has(entry.sha256) +class TestCheckStoreCommand: + """Tests for the ``ref test-cases check-store`` command.""" + + def test_check_store_help(self, invoke_cli): + result = invoke_cli(["test-cases", "check-store", "--help"]) + assert "writable native baseline store" in result.stdout + + def test_check_store_ok(self, invoke_cli, mocker): + store = MagicMock() # preflight passes (no side effect) + mocker.patch( + "climate_ref_core.regression.store.build_native_store", + return_value=store, + ) + result = invoke_cli(["test-cases", "check-store"]) + assert "Native store OK" in result.stdout + store.preflight.assert_called_once() + + def test_check_store_reports_auth_failure(self, invoke_cli, mocker): + from climate_ref_core.regression.store import NativeStoreUnavailableError + + store = MagicMock() + store.preflight.side_effect = NativeStoreUnavailableError( + "Native store authentication failed (HTTP 401) for bucket 'ref-baselines-public'" + ) + mocker.patch( + "climate_ref_core.regression.store.build_native_store", + return_value=store, + ) + result = invoke_cli(["test-cases", "check-store"], expected_exit_code=1) + assert "401" in result.stderr + + def test_check_store_reports_unconfigured(self, invoke_cli, mocker): + mocker.patch( + "climate_ref_core.regression.store.build_native_store", + side_effect=NotImplementedError("R2 backend deferred"), + ) + result = invoke_cli(["test-cases", "check-store"], expected_exit_code=1) + assert "not configured" in result.stderr + + class TestCIGateCommand: """Tests for the ``ref test-cases ci-gate`` command.""" diff --git a/packages/climate-ref/tests/unit/test_config.py b/packages/climate-ref/tests/unit/test_config.py index 9bbe80daa..83b5e8152 100644 --- a/packages/climate-ref/tests/unit/test_config.py +++ b/packages/climate-ref/tests/unit/test_config.py @@ -200,7 +200,8 @@ def test_defaults(self, monkeypatch, mocker): "executor": {"executor": "climate_ref.executor.LocalExecutor", "config": {}}, "native_store": { "url": "https://baselines.climate-ref.org", - "credentials": "", + "s3_endpoint_url": "https://2aa5172b2bba093c516027d6fa13cdc8.r2.cloudflarestorage.com", + "bucket": "ref-baselines-public", "cache_dir": str(resolve_cache_dir("native-baselines")), }, "paths": { diff --git a/pyproject.toml b/pyproject.toml index 564c3af21..aa8e1229e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,9 @@ dev = [ "types-tqdm>=4.67.3.20260408", "pytest-xdist>=3.6.1", "types-pyyaml>=6.0.12.20260408", + # Native baseline store write backend (R2) — exercised by tests and the mint path + "boto3>=1.34", + "boto3-stubs[s3]>=1.34", ] [tool.uv] diff --git a/uv.lock b/uv.lock index aa28e9084..df34c13d5 100644 --- a/uv.lock +++ b/uv.lock @@ -265,6 +265,65 @@ css = [ { name = "tinycss2" }, ] +[[package]] +name = "boto3" +version = "1.43.30" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/47/2db3e7c1317019d800a1b4181059656842b8aec69ad578ac01a73eba3b89/boto3-1.43.30.tar.gz", hash = "sha256:6b1ee360f363a457f67a8f5702f522043d8a32d67a97c362ad12075d8b5b531e", size = 113154, upload-time = "2026-06-15T20:32:58.123Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/61/4031fd64c458ab7e01879e6ac8b42e256e37139df551315beafe04e74b57/boto3-1.43.30-py3-none-any.whl", hash = "sha256:89e982463d94773136ccf69be77cccd54ff1ce351a6aadd1d3437fcb693681b5", size = 140534, upload-time = "2026-06-15T20:32:55.646Z" }, +] + +[[package]] +name = "boto3-stubs" +version = "1.43.30" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore-stubs" }, + { name = "types-s3transfer" }, + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/20/07847fbbeb27edc2c04fe5bc950a448621b1d12db1b263b471406956d685/boto3_stubs-1.43.30.tar.gz", hash = "sha256:5c53e5243e611f157527454e1b9292062c1c3e1d74ab47754585b910a613d7e1", size = 103025, upload-time = "2026-06-15T21:24:00.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/90/43fede497226c914bc7d34d4721b21d06256131bbce3050681077af98fe6/boto3_stubs-1.43.30-py3-none-any.whl", hash = "sha256:f9c35131d86698c1b3748dc3bc086f1f3c18c2cf6540c86a8eab7adb2a8e694d", size = 70828, upload-time = "2026-06-15T21:23:54.957Z" }, +] + +[package.optional-dependencies] +s3 = [ + { name = "mypy-boto3-s3" }, +] + +[[package]] +name = "botocore" +version = "1.43.30" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/dd/6df8586de6cc036eec2e491b1a65c489c43d9722929aef407c20b7323329/botocore-1.43.30.tar.gz", hash = "sha256:19ed560cb35ae43bf010d37da429a553c07063bf7efea0f2cb53be8a78d3e3d5", size = 15520608, upload-time = "2026-06-15T20:32:45.439Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/b0/9343b5007ad24363c63455facfb0859e9765cf245eebffee8233056b9696/botocore-1.43.30-py3-none-any.whl", hash = "sha256:26b1dded84d89b396180916f56900bd2ab1c0d545a66d1d2c3eeb40f772935b2", size = 15205423, upload-time = "2026-06-15T20:32:40.827Z" }, +] + +[[package]] +name = "botocore-stubs" +version = "1.43.14" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "types-awscrt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7f/81/79693e833291c00dc89ee610e5e915381b6f08233912e28df50106840780/botocore_stubs-1.43.14.tar.gz", hash = "sha256:9e3bc1fdd51da7473f0df726c82747a1b0ae913449d629659765c247fecc2039", size = 42738, upload-time = "2026-05-25T06:06:37.484Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/ca/f017727b11895908c5dedc829cf2ec35e0c4b2a26ba875db325fef2cefdf/botocore_stubs-1.43.14-py3-none-any.whl", hash = "sha256:fb98f1475c92fd718644e786b5c543a20f1b1f610e89e0a7191c3f1f429c75aa", size = 67093, upload-time = "2026-05-25T06:06:34.532Z" }, +] + [[package]] name = "bracex" version = "2.6" @@ -779,6 +838,11 @@ dependencies = [ { name = "typing-extensions" }, ] +[package.optional-dependencies] +aws = [ + { name = "boto3" }, +] + [package.dev-dependencies] dev = [ { name = "types-requests" }, @@ -787,6 +851,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "attrs", specifier = ">=23.2.0" }, + { name = "boto3", marker = "extra == 'aws'", specifier = ">=1.34" }, { name = "cattrs", specifier = ">=24.1" }, { name = "environs", specifier = ">=11" }, { name = "fastprogress", specifier = "==1.0.5" }, @@ -803,6 +868,7 @@ requires-dist = [ { name = "setuptools", specifier = "<81" }, { name = "typing-extensions" }, ] +provides-extras = ["aws"] [package.metadata.requires-dev] dev = [{ name = "types-requests" }] @@ -910,6 +976,8 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "beautifulsoup4" }, + { name = "boto3" }, + { name = "boto3-stubs", extra = ["s3"] }, { name = "bump-my-version" }, { name = "cartopy" }, { name = "celery-types" }, @@ -969,6 +1037,8 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "beautifulsoup4", specifier = ">=4.12,<4.15" }, + { name = "boto3", specifier = ">=1.34" }, + { name = "boto3-stubs", extras = ["s3"], specifier = ">=1.34" }, { name = "bump-my-version", specifier = ">=0.28.1" }, { name = "cartopy", specifier = ">=0.24.1" }, { name = "celery-types", specifier = ">=0.23.0" }, @@ -1946,6 +2016,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "json5" version = "0.13.0" @@ -2892,6 +2971,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/28/9a/f23c163e25b11074188251b0b5a0342625fc1cdb6af604757174fa9acc9b/mypy-1.20.2-py3-none-any.whl", hash = "sha256:a94c5a76ab46c5e6257c7972b6c8cff0574201ca7dc05647e33e795d78680563", size = 2637314, upload-time = "2026-04-21T17:05:54.5Z" }, ] +[[package]] +name = "mypy-boto3-s3" +version = "1.43.14" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/2c/fc409f9ff5904a02cf4c2c1518c34d20cb56f22b2368b35fd0adda2926f3/mypy_boto3_s3-1.43.14.tar.gz", hash = "sha256:73d54c1d0999c73c403dc9a9a3da4a9722715aba116595af08c0d4675f8bc670", size = 77078, upload-time = "2026-05-22T20:48:28.251Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/61/e8e74a3f4c729719efc8b1d00179bc16c302202eac32a1b56a842a997ed2/mypy_boto3_s3-1.43.14-py3-none-any.whl", hash = "sha256:ce77096d6c5f90020c45e34c83d2268ca2bb17726149ce5033751870f7fb4e97", size = 84277, upload-time = "2026-05-22T20:48:25.032Z" }, +] + [[package]] name = "mypy-extensions" version = "1.1.0" @@ -4554,6 +4645,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c0/98/6beb4b351e472e5f4c4613f7c35a5290b8be2497e183825310c4c3a3984b/ruff-0.15.12-py3-none-win_arm64.whl", hash = "sha256:a538f7a82d061cee7be55542aca1d86d1393d55d81d4fcc314370f4340930d4f", size = 11120821, upload-time = "2026-04-24T18:16:57.979Z" }, ] +[[package]] +name = "s3transfer" +version = "0.18.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e0/1f/12417f7f493fc45e1f9fd5d4a9b6c125cf8d2cf3f8ddbdfab3e76406e9d6/s3transfer-0.18.0.tar.gz", hash = "sha256:3760b8b7ec1315da54048b2d626276732bee4300d054d492d4e1d43e20d4ecbd", size = 160560, upload-time = "2026-05-28T19:39:09.124Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/58/a58fc997655386daa2e25784e30c288aa3e3819e401f77029ee4899fb55a/s3transfer-0.18.0-py3-none-any.whl", hash = "sha256:239c13b09e65ad0346e1be7348b8a202dcad44ac7ea7c6eb858fc881dce739b6", size = 88572, upload-time = "2026-05-28T19:39:07.999Z" }, +] + [[package]] name = "scipy" version = "1.15.3" @@ -5109,6 +5212,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, ] +[[package]] +name = "types-awscrt" +version = "0.34.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3e/59/44409a8fc06b444ab1a6f71dcb29d49a6e17e02424345eb51b051bebb345/types_awscrt-0.34.1.tar.gz", hash = "sha256:559aa04250f6a419a617dfb788f3e10903aaf74700ef23e521b64a411b83b803", size = 19062, upload-time = "2026-06-05T04:40:10.689Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/b1/214b12162b452ed6acd230065e6c587cde6b96871e3ce6d653f40888f8df/types_awscrt-0.34.1-py3-none-any.whl", hash = "sha256:20c752b6031544d8f694803c35174aee129f1be5ddf886ae46d22f7ffd9b7d75", size = 45688, upload-time = "2026-06-05T04:40:09.198Z" }, +] + [[package]] name = "types-pyyaml" version = "6.0.12.20260408" @@ -5130,6 +5242,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/12/709ea261f2bf91ef0a26a9eed20f2623227a8ed85610c1e54c5805692ecb/types_requests-2.32.4.20260107-py3-none-any.whl", hash = "sha256:b703fe72f8ce5b31ef031264fe9395cac8f46a04661a79f7ed31a80fb308730d", size = 20676, upload-time = "2026-01-07T03:20:52.929Z" }, ] +[[package]] +name = "types-s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/64/42689150509eb3e6e82b33ee3d89045de1592488842ddf23c56957786d05/types_s3transfer-0.16.0.tar.gz", hash = "sha256:b4636472024c5e2b62278c5b759661efeb52a81851cde5f092f24100b1ecb443", size = 13557, upload-time = "2025-12-08T08:13:09.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/27/e88220fe6274eccd3bdf95d9382918716d312f6f6cef6a46332d1ee2feff/types_s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:1c0cd111ecf6e21437cb410f5cddb631bfb2263b77ad973e79b9c6d0cb24e0ef", size = 19247, upload-time = "2025-12-08T08:13:08.426Z" }, +] + [[package]] name = "types-tqdm" version = "4.67.3.20260408" From 360e406f12f347ce183119e0df0f8abcf83e665c Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 17 Jun 2026 14:24:02 +1000 Subject: [PATCH 2/3] chore(example): mint native baselines to the R2 store Mint the example provider's native baselines to the production R2 bucket and author the `native` block in each test case's `manifest.json`. Remove the committed `annual_mean_global_mean_timeseries.nc` (now fetched from the store) and the retired `.catalog_hash` sidecars. --- .../cmip7/manifest.json | 20 +++++++++++++++++- .../cmip7/regression/.catalog_hash | 1 - .../default/manifest.json | 20 +++++++++++++++++- .../default/regression/.catalog_hash | 1 - .../annual_mean_global_mean_timeseries.nc | Bin 32253 -> 0 bytes 5 files changed, 38 insertions(+), 4 deletions(-) delete mode 100644 packages/climate-ref-example/tests/test-data/global-mean-timeseries/cmip7/regression/.catalog_hash delete mode 100644 packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/regression/.catalog_hash delete mode 100644 packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/regression/annual_mean_global_mean_timeseries.nc diff --git a/packages/climate-ref-example/tests/test-data/global-mean-timeseries/cmip7/manifest.json b/packages/climate-ref-example/tests/test-data/global-mean-timeseries/cmip7/manifest.json index c31213814..70056b526 100644 --- a/packages/climate-ref-example/tests/test-data/global-mean-timeseries/cmip7/manifest.json +++ b/packages/climate-ref-example/tests/test-data/global-mean-timeseries/cmip7/manifest.json @@ -1,10 +1,28 @@ { + "catalog_hash": "8371cadebf4fd40b162e6458fffab6ef3ab3d505", "committed": { "diagnostic.json": "5bb5817e3c59a02859dd3cd2d94f3b00e026a25a70e0f22b1e7fc5234ce636f7", "output.json": "f9a2e86a5906875d01f9ee62d3044eefd3d21d9bfcc71be62d4ddcd2fa75fe9d", "series.json": "4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945" }, - "native": {}, + "native": { + "annual_mean_global_mean_timeseries.nc": { + "sha256": "68f789157617604344470133d5749a94805934abb8624caa96d80866e28196b9", + "size": 31184 + }, + "diagnostic.json": { + "sha256": "5bb5817e3c59a02859dd3cd2d94f3b00e026a25a70e0f22b1e7fc5234ce636f7", + "size": 968 + }, + "output.json": { + "sha256": "f9a2e86a5906875d01f9ee62d3044eefd3d21d9bfcc71be62d4ddcd2fa75fe9d", + "size": 504 + }, + "series.json": { + "sha256": "4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945", + "size": 2 + } + }, "schema": 1, "test_case_version": 1 } diff --git a/packages/climate-ref-example/tests/test-data/global-mean-timeseries/cmip7/regression/.catalog_hash b/packages/climate-ref-example/tests/test-data/global-mean-timeseries/cmip7/regression/.catalog_hash deleted file mode 100644 index 8f0cfd138..000000000 --- a/packages/climate-ref-example/tests/test-data/global-mean-timeseries/cmip7/regression/.catalog_hash +++ /dev/null @@ -1 +0,0 @@ -8371cadebf4fd40b162e6458fffab6ef3ab3d505 \ No newline at end of file diff --git a/packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/manifest.json b/packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/manifest.json index c31213814..3fbef74fa 100644 --- a/packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/manifest.json +++ b/packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/manifest.json @@ -1,10 +1,28 @@ { + "catalog_hash": "a69e5b26bcd5050e2c7027ddb6cef0e2ad96db05", "committed": { "diagnostic.json": "5bb5817e3c59a02859dd3cd2d94f3b00e026a25a70e0f22b1e7fc5234ce636f7", "output.json": "f9a2e86a5906875d01f9ee62d3044eefd3d21d9bfcc71be62d4ddcd2fa75fe9d", "series.json": "4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945" }, - "native": {}, + "native": { + "annual_mean_global_mean_timeseries.nc": { + "sha256": "acd6202a8fd46d5e1a8d5bef1650b20ab1a93452193aba56a0d05a173cacc62b", + "size": 32253 + }, + "diagnostic.json": { + "sha256": "5bb5817e3c59a02859dd3cd2d94f3b00e026a25a70e0f22b1e7fc5234ce636f7", + "size": 968 + }, + "output.json": { + "sha256": "f9a2e86a5906875d01f9ee62d3044eefd3d21d9bfcc71be62d4ddcd2fa75fe9d", + "size": 504 + }, + "series.json": { + "sha256": "4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945", + "size": 2 + } + }, "schema": 1, "test_case_version": 1 } diff --git a/packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/regression/.catalog_hash b/packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/regression/.catalog_hash deleted file mode 100644 index 14a38d83f..000000000 --- a/packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/regression/.catalog_hash +++ /dev/null @@ -1 +0,0 @@ -a69e5b26bcd5050e2c7027ddb6cef0e2ad96db05 \ No newline at end of file diff --git a/packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/regression/annual_mean_global_mean_timeseries.nc b/packages/climate-ref-example/tests/test-data/global-mean-timeseries/default/regression/annual_mean_global_mean_timeseries.nc deleted file mode 100644 index 33e71c8919aa4c0ba1fb916027671ed5c678f600..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32253 zcmeHP3w%_?xt}C}5F?MGP!;5;Ac-WK-OU3?s*s!{BuWU9C@qTdWcMUFa`xfw1`;X) zYHO>t8+=Put`BVYXUiLvv+1W?Q z#2lxTQ>H)WW{WJ!dB`|12$16y7iG(mVtfvxu5id$Ar25(gJgC*8q^jJ#&?h8OhXRk zhpMa?(rH>9oeBMG>M_a+lvP!UK?y5lt{b5q9?oT{eF{TuBhgBgTNRuzL}eJvQDlgO zWrmHFD#IAAVTj6*lQ7~1$Ej!I_}KyK*)St!8UfR`jmjF8bVL?9mI-^~z?rq%9;lmF zKUa`EWeYXxRKgLcU1~HfXN)y^if}N4gO0GO)I9>E!4U|SP>~NDmCpsT#Yb00V%076 zHJ}JWHww>!`Rmj$W(lX2bvrJaFLMo}TnBNk1G$dATwAYP_I6z}>Jn2_n~tbC=#V)` z%oW|B1i~zno*2zfbd|xuE!)0*RdvhkI#BmIJ!<0K+|_&5AaO6Xe+=3J%pW#>q*z2b z5TnWSNX=7+i6rGf!m)rMOCKLE#^mAGWFZOxmmL{6N{pcqs1>3ZV3)j9B#xsT^Moh@ zJU+D9BU)%Oln8-|Dc;%GUMxz8;;BMVmztV>J56lJ#jiG5uPC|ishQ#s8oD}S6Ixj? zxK^wtZk$Wp6(TX_usSh@hW{cVG{EjPmo5|@;&_1&;{j)l|J_1y1%8M*P$Ym{yZH=J zNUfl^M*|L@KHnDC%Q~{ekx1C~$GcXF0_ttGkX$|K+(*tA4-n~oWCzUnR^|7_pNVGl zg|tZ6+;zFQfO0@P90u4qW5=~(Ewyr#Y)A}yZv7f@JQeY<^xc2YPp+qa2{DPs?YcGB z+$f+kgd-N7;4RvRlxKf~9RFdic3f1r#@)p@m&~{ZPMk@j z2OH+8ck4{bO{mb=?R2^lz#VpAIRn?)Bn>@-O?}zqmj1|I$EHy#l4Xr^FInFyFTcGm;0**ky8 z&WBwSaOtw6H;DN(^mP6#-$Se-FKV{ySnEC4kJ)fcCA=zbif@Nm5uj~Wx+VS(t73BOM=IY)$>MFwMb+pDJ zjBA_os8qfj&HVor;(%KJ(=8F#d~wYLmyP2Rc$X#TvV2^Il55tu>@t_Ka48CxJ?Apa zToRX$v)^S2mrk#{=F1~MycI9c%91;v z?5wP;wTd_gbMj~#F%lM=G}^>WF?cwo$GY_Yw@lbKa;f}~Tm^e1-$O&q@sEM&d#n|QO@?h!O z$jKcEOEpJKKApL6gJOom)xoJ9th1AKFb}I1l^lZ6fdQ<~u4-sv2u${+TL&OQ#jr@Xye~Wnz(o#jv|fF=~LeX@)S>&hDBi5TGFswg9nKjYU`3O2j2^sQ`5;l!x6SF-I}q z=(gM$7UXF}N)EO}a&)4RxRtOLo3a+Ff&RLpd6Q_To`b!T%mq(VM{C#!aW*nTC>uMs z@?en+M%Lk+HJ&iSK_eF2Q}*|-Tcg!AH`fZsLy`q^=1uS%J%btdn%u-?X+pe8W|+vr zChtkP;mTT@BTkjOt$An6_0-kZRHuA&auc?B`=X{46|Hmhxv@ypj3q3S8E_OTBcd0Z zF%q@o{DqJ{$jx!3p6MQcVP~+t>^PstSL!LHW-}qffeZ(}#2j$Xf7&}V-8A#u960Ty z!U}{Uu~s$JrD`KnfTn+mr_|%k!O9`CC1OTkkpCFxEnrJz+xnxl53J1*?mOvdIdQ#1r9tnja;kZ_n zNW?5Gkx*Yn&7E-ISJ~DgQ$vTehH%1+g^@iHHf(J+SEc25CKAzjMR9RJRTbcxJds#O zv1%_~T!6(@NBv!PLURg41EHYhvF)(!>4+>YmOUtLF=L^4Q~SwrQ)`dJG_)9sYms)X z3kk>t0>Da*dmHIWM7t7&niUS%T|r6@SP4m=7BkQ4vSKFI`~-}EaFEymO|4-l!jUDm z8SJ1(AgZn_mgvL-D~#qsT-azs&02y?2x4V=1Mx`61Q$c9op_-ZFkn&;v0$mT*fLNK zF|M_hYHjU`*;;8OqoEkBcLe}DRk<)iB-|rvy&{+gL&#jC~63`A@c3b>4X|rY3f*E8Hq=$8bVU>@$&prXu(gK(8yb zS@p9DwWg(oT5}ct>_Y7n{8j5n!(Tq0YR74MlfO`#<8La|^!if#SEu`Y3`m!;l0vN> zKm3T-Nwny}s!M6Wp<1j!qnG+LL5>${IHw{Uo-k{f(dVB+B zN=XF|MQfrb%H!cI3RO+B^u`?c^?AU$+FCJ5P4ow%D#eGk3^Z`WryMx;2gv6vA?~co z%Enx^&Hg#6pwwwoAS+Wjpp#^S)v7^W1F$oyF%o7)eu-DBih{?Q6dA3trmVvBmA03a zwfoDa1~5eJZT>QUiCOCNmzGX1_m@odr8Wx?3Y)tYiuZE6Y2vKlT?Y+ zWI%2l^g7s@syS2c-|Yu2QcfvTGhbhP$u%gLssGC`v!u3@h()237iaR8}btR=73LY1zSj^!CuG zjTx{umbHDV?*8`YW~OvB?*?#q2qr792;`b2_6(#VQ?ru{+*W8GNM5??c6i{0t>X2R zU~{FGw7#h+hX8IO*@j`;Mam4rlnB8}rD1aHPT1n4aic~&k)pS+B4>`%%QT|5PwS+p zBm)yWA)C!VEW0cKj#hw%;bTu!yOeZA%RWiDa*MUoRt7dNuQSeaD3^7mN&0U#BqvUC zs8_o)m5EGlj~OotMgnq!CN;H3?hidKrQ_-AwudYdPv6q2+aHCOZ-X(G4pS>R1gR)snW56T|7R16Io+%sUGXNUT-+qbQV4PFp0M z!A-4Hv@=F~1BSZ#hBW^lWf?V*Gmg>}(tA^cQS`c$kc1q;@oV?T6?r@0Ym{N8LJ=b6 ziSPpT8=;<-B80n%MuelJw^E2CO#+0^NX!tQr+2;=B15W@FW-#(!OM+gCW-;MB;CP5s36yvQ& zGk$#%<9Cl?{E$pQgfnPTB5XdEv6AEk;bt75p>RkslGafLXG4V~t*S^KrbSmSoPh%`lZ%wXj<@g}7 zZ6R5{!!$Gj7FU zB?`4Yj6Yk-sGZGt@iN9w&tdFX!T8)t#_IDJ?>L`v+;y)hZ@G$b%xcE-e#H3RHH@cU$N2af#%b3xuDyYA z^i7N_)-t~RW5xwHGd_9?W7(~YH{8ZJ;&#SmcQF3(r;PJ|#`y4EjQ+bB*WAOH_e;i} zUoq~ym$Bv7j1T;V(X*cMn)?}tJ;=EDA;ujKGoJKY#``ufPT9zK)nkl9HZjIGGrsx+ zW8;&I>z-oFe}?h$XBo49&lugp_|jI!6JB8a#Wu!CFEL*7GUM)7813zhFTBP$>vhJT z|AFzyos7vh8UMD6aoPE|%+35|c<^PC0hY}D`(M789P8)8S=VNXKsNo(UvtUOy&wBl z^GEEOp&B044^{L)h5rap+^K=y>m$&y?~QFvxntT^e_ImkG?)bnCgjlL;{Ui)tkpRyo&#le7BipQWEx$Idw4QtUnuW+#Vljy~-t>8I6Yu2P2* zCs*Mk_xy=Zt2@0=oly*P+9Xc8Ppfx$`Ptzri-W~aeA1~-E9OyMNAvmO**9w=#TAqQ zz77N2d()?yc%KTRuQ!#VK7NN0g_*hr7yjUhazXILirJDQPQK6QUx6Xb1 zhx+PNk@C?v9D z^auS-Xv@hu$PZ?D5uR z8^tq}fI4ykflF8I`rMB0*_Cgci)Wdze;nB3j&3H8y=&Y>x|q0z*FCi9k|5IM#5Ld| zeXv~v{m8^i)hmVl>r|%H7v=!3h==9;D_6uTi@5pzD`KuNX7Q5`6&_lz2)%-)c_w5y z@a5yc;S;{IXzzaF18EgQnBhn>n}~gTVt&k5E2!Ul0~B{^P~KYU-cG5+=TAN7%Klcx zD%Eyws+9|FdpFeIs#vAo4EN>g`No4UvQ^>qLUl&9+(&1Bt75d;ML5=kgS*bpS*olG z`F;8i?x|oRc@fNPJP8SW4KIkHSz(x9CzgWHf zr~gz(GKKex15JAUiOv_HKZ$b{bIF89$b++Wx=&=G#U%XC_(aHek?s>ApT*z)a@UY_ z3Y_n|9KS@~+2msVR~`&=JSz@52g9US1{665!<@Kv{`WS?^M%e!BFUoAbGuL3r*l0{ zbq-Q31E1#k{1K0V&nMo$d@twt$fZm@q}v%<>GR1w91EmZVw7(dz8Wmj=_-dRWR}+m zRM7<%^w}Z4a;L|s55jS8fVvETZl=M9pYmKE21?dcN0*s6MR8?`{0tu#`<1UI|* za1%$4LVVL7jl^)59H@v>3H+2doZ$2Rcvq|)pMh%l zjupoS@v(Zl-PO&vtC(Rk*3pBn_&;}Ikvd0zA%EXBL&OvQ9@u@nk{Yc*Gx&C1buj%# zf-Y54+UK^usTE}<72eVX8mFzj?Y;^$&Pt}%Y{&vi;U0BQ<6d=_{2R=Q}VydKFMiH=c+4E zKOGxZpZZH>hZGQ^J|IuAK^CF;O?mq6+DM<@>s9q>^IP3GQZm5&Zb4mi6H*+C2sbvR zP9j%mllkQ2WRJY>#eH|lKsEh)YYOKD1DN;K4J%$x>IXiT)JJYk>c9M5Qun@`)ICop z^=@Du@RSFW`UxA8`UE^b0)HL&JrvJMcO>-!JYTalso(!*QuqHMsc-vzQvc2~N&UnZ zlltK=CG~~7lKSoMB=xIyCiN!7yYP47rlh`QOHzLec|UqQsh2#J)Hgn#)Wt87`fGP4 z^#-IJ{BTm=iF_MD*J)_$PNa=`G^r0m+O`czeHZe-j&dVW&)aC@-dB>k0eU_~Ts)H0 zXQ6x#=sxMbq+W@9b3oHQ+miZ&_&xL0q<;1LN&QXKu?qcJ2bwyG#`l1rI}7oZXzOce z^AePK|F=o~4)l5D3rW2keH{5lQf~o%iaMVLJzLRsBmO>kEvauuzQL$>GycYb_C?5B zgK{UK&YEYF`m<<%JNh#U@!x*nFdp?TN84|rzT5EpbnwUzdMCb{ z)OTZS7NTzfwA+BXUs(_OP;MG{|IAxSeG}qap1?TaS?3)XqsNl^@u>3%@a1ES(|Y7x z4jMOMykbu#bqn=ejrb(c_hZ!Y^oL1(DcY|B?VC{M9MCo#bv+AuA42@HKY=&kQ^5yG z{o;?4`c0s56!Jfe^6&m7sSi4|SD!YvS07*8tKU@KtN*y5SHEaluRgJ{S0AbO>aWh~ z)emaw)rZ&h>hF~F>YEUM7tcOH+zUJu&!VUG>X$6&)mQy_>w{xz+GpPWm`99V(LOU1 z&+sE751ILp(FYlQm{|`p>tV(o$k+q>Vh^10yQOb$-MwSI!Wo%(h94Pu$jpa~K9G7K XqYpFdLuP%<*asQ=i1olf)Q5iuGz2y1 From e7ca61e992326dd7803987d71095e39b1de5914f Mon Sep 17 00:00:00 2001 From: Jared Lewis Date: Wed, 17 Jun 2026 14:39:58 +1000 Subject: [PATCH 3/3] docs(changelog): add fragment for the R2 native baseline backend --- changelog/732.feature.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelog/732.feature.md diff --git a/changelog/732.feature.md b/changelog/732.feature.md new file mode 100644 index 000000000..007d29e46 --- /dev/null +++ b/changelog/732.feature.md @@ -0,0 +1,5 @@ +Added a Cloudflare R2 object-store backend for diagnostic regression baselines, so `ref test-cases mint` can publish a test case's native outputs to the shared baseline store rather than only a local directory. +The store endpoint and bucket are configured through the `REF_NATIVE_STORE_*` settings (defaulting to the project's public baseline bucket), and write credentials are supplied via the environment or a named AWS/R2 profile, never the persisted configuration. + +Minting is also easier to get right. +`ref test-cases mint` now checks the store credentials and bucket up front and stops with a clear message when they are misconfigured, the new `ref test-cases check-store` command verifies store connectivity without minting, `ref test-cases mint --dry-run` previews what would be minted, and `ref test-cases fetch` restores a missing local paths file on its own so a fresh checkout no longer needs `--force`.