From 60557251baeaf885f57e188819e1e938d6311ce0 Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Sat, 16 May 2026 16:05:37 -0700 Subject: [PATCH 01/10] Switch query service to new attrs API --- src/azul/service/query_service.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/azul/service/query_service.py b/src/azul/service/query_service.py index 83c6f63af4..e5d86f4390 100644 --- a/src/azul/service/query_service.py +++ b/src/azul/service/query_service.py @@ -20,7 +20,7 @@ Self, ) -import attr +import attrs from furl import ( furl, ) @@ -120,7 +120,7 @@ def process_response(self, response: R1) -> R2: raise NotImplementedError -@attr.s(frozen=True, auto_attribs=True, kw_only=True) +@attrs.frozen(auto_attribs=True, kw_only=True) class OpenSearchChain[R0, R1, R2](OpenSearchStage[R0, R2]): """ The result of wrapping a stage or chain in another stage. @@ -151,7 +151,7 @@ def stages(self) -> Iterable[OpenSearchStage]: yield self.inner -@attr.s(frozen=True, auto_attribs=True, kw_only=True) +@attrs.frozen(auto_attribs=True, kw_only=True) class _OpenSearchStage[R1, R2](OpenSearchStage[R1, R2], metaclass=ABCMeta): """ A base implementation of a stage. @@ -171,7 +171,7 @@ def wrap[R0](self, other: OpenSearchStage[R0, R1]) -> OpenSearchChain[R0, R1, R2 TranslatedFilters = Mapping[FieldPath, Mapping[str, Sequence[PrimitiveJSON]]] -@attr.s(frozen=True, auto_attribs=True, kw_only=True) +@attrs.frozen(auto_attribs=True, kw_only=True) class FilterStage(_OpenSearchStage[Response, Response]): """ Converts the given filters to an OpenSearch query and adds that query as @@ -269,7 +269,7 @@ def prepare_query(self, skip_field_paths: tuple[FieldPath] = ()) -> Query: return Q('bool', must=query_list) -@attr.s(frozen=True, auto_attribs=True, kw_only=True) +@attrs.frozen(auto_attribs=True, kw_only=True) class AggregationStage(_OpenSearchStage[MutableJSON, MutableJSON]): """ Cooperate with the given filter stage to augment the request with an @@ -422,7 +422,7 @@ def _populate_accessible(self, aggs: MutableJSON) -> None: aggs[special_fields.accessible.name] = agg -@attr.s(frozen=True, auto_attribs=True, kw_only=True) +@attrs.frozen(auto_attribs=True, kw_only=True) class SlicingStage(_OpenSearchStage[Response, Response]): """ Augments the request with a document slice (known as a *source filter* in @@ -451,7 +451,7 @@ def _prepared_slice(self) -> DocumentSlice | None: # FIXME: Elminate Eliminate reliance on Elasticsearch DSL # https://github.com/DataBiosphere/azul/issues/4111 -@attr.s(frozen=True, auto_attribs=True, kw_only=True) +@attrs.frozen(auto_attribs=True, kw_only=True) class ToDictStage(_OpenSearchStage[Response, MutableJSON]): def prepare_request(self, request: Search) -> Search: @@ -473,7 +473,7 @@ def sort_key_to_json(s: SortKey) -> AnyJSON: return list(s) -@attr.s(auto_attribs=True, kw_only=True, frozen=True) +@attrs.frozen(auto_attribs=True, frozen=True) class Pagination: order: str size: int @@ -486,9 +486,9 @@ def advance(self, search_before: SortKey | None, search_after: SortKey | None ) -> Self: - return attr.evolve(self, - search_before=search_before, - search_after=search_after) + return attrs.evolve(self, + search_before=search_before, + search_after=search_after) def link(self, *, previous: bool, **params: str) -> furl | None: """ @@ -517,7 +517,7 @@ class ResponsePagination(JSONTypedDict): ResponseTriple = tuple[JSONs, ResponsePagination, JSON] -@attr.s(frozen=True, auto_attribs=True, kw_only=True) +@attrs.frozen(auto_attribs=True, kw_only=True) class PaginationStage(_OpenSearchStage[JSON, ResponseTriple]): """ Handles the pagination of search results From c1c72054e374dea5e85fcefc80cde8d24156d8df Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Mon, 18 May 2026 17:29:07 -0700 Subject: [PATCH 02/10] fixup! Switch query service to new attrs API --- src/azul/service/query_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/azul/service/query_service.py b/src/azul/service/query_service.py index e5d86f4390..70cee20df0 100644 --- a/src/azul/service/query_service.py +++ b/src/azul/service/query_service.py @@ -473,7 +473,7 @@ def sort_key_to_json(s: SortKey) -> AnyJSON: return list(s) -@attrs.frozen(auto_attribs=True, frozen=True) +@attrs.frozen(auto_attribs=True, kw_only=True) class Pagination: order: str size: int From f068dafbe9999bcf0baf880a7afcbdf1cc56c8c3 Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Sun, 17 May 2026 14:25:51 -0700 Subject: [PATCH 03/10] Store file URL function in IndexService --- src/azul/service/drs_controller.py | 2 +- src/azul/service/index_controller.py | 3 +-- src/azul/service/index_service.py | 19 ++++++++----------- src/azul/service/repository_controller.py | 2 +- test/service/test_response.py | 3 +-- 5 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/azul/service/drs_controller.py b/src/azul/service/drs_controller.py index 8ff29c4742..5abe14e0ea 100644 --- a/src/azul/service/drs_controller.py +++ b/src/azul/service/drs_controller.py @@ -67,7 +67,7 @@ class DRSController(ServiceController): @cached_property def _service(self) -> IndexService: - return IndexService() + return IndexService(file_url_func=self._file_url) _drs_spec_description = fd(''' This is a partial implementation of the [DRS 1.0.0 spec][1]. Not all diff --git a/src/azul/service/index_controller.py b/src/azul/service/index_controller.py index ed07365a18..dfe2e69805 100644 --- a/src/azul/service/index_controller.py +++ b/src/azul/service/index_controller.py @@ -70,7 +70,7 @@ class IndexController(QueryController): @cached_property def _service(self) -> IndexService: - return IndexService() + return IndexService(file_url_func=self._file_url) _min_page_size = 1 @@ -365,7 +365,6 @@ def search(self, entity_type: str, entity_id: str | None = None) -> str | JSON: try: response = self._service.search(catalog=self.app.catalog, entity_type=entity_type, - file_url_func=self._file_url, item_id=entity_id, filters=filters, pagination=pagination) diff --git a/src/azul/service/index_service.py b/src/azul/service/index_service.py index d37345b025..1cdb46c27c 100644 --- a/src/azul/service/index_service.py +++ b/src/azul/service/index_service.py @@ -80,7 +80,6 @@ def __init__(self, entity_type: str, entity_id: str): class SearchResponseStage(_OpenSearchStage[ResponseTriple, MutableJSON], metaclass=ABCMeta): service: IndexService - file_url_func: FileUrlFunc def prepare_request(self, request: Search) -> Search: return request @@ -90,10 +89,10 @@ def _file_url(self, *, uuid: str, version: str, drs_uri: str | None) -> str | No # To download a file we need its DRS URI return None else: - return str(self.file_url_func(catalog=self.catalog, - fetch=False, - file_uuid=uuid, - version=version)) + return str(self.service.file_url_func(catalog=self.catalog, + fetch=False, + file_uuid=uuid, + version=version)) def _file_mirror_uri(self, source: SourceRef, file: JSON) -> str | None: file_cls = self.plugin.file_class @@ -113,7 +112,9 @@ def prepare_request(self, request: Search) -> Search: return request +@attrs.frozen(kw_only=True) class IndexService(QueryService): + file_url_func: FileUrlFunc @cache def mirror_service(self, catalog: CatalogName) -> MirrorService: @@ -123,7 +124,6 @@ def search(self, *, catalog: CatalogName, entity_type: str, - file_url_func: FileUrlFunc, item_id: str | None, filters: Filters, pagination: Pagination @@ -148,8 +148,7 @@ def search(self, filters=filters, pagination=pagination, aggregate=item_id is None, - entity_type=entity_type, - file_url_func=file_url_func) + entity_type=entity_type) special_fields = self.metadata_plugin(catalog).special_fields for hit in response['hits']: @@ -169,7 +168,6 @@ def _search(self, aggregate: bool, filters: Filters, pagination: Pagination, - file_url_func: FileUrlFunc ) -> MutableJSON: """ This function does the whole transformation process. It takes the path @@ -225,8 +223,7 @@ def _search(self, response_stage_cls = plugin.search_response_stage chain = response_stage_cls(service=self, catalog=catalog, - entity_type=entity_type, - file_url_func=file_url_func).wrap(chain) + entity_type=entity_type).wrap(chain) request = self.create_request(catalog, entity_type) request = chain.prepare_request(request) diff --git a/src/azul/service/repository_controller.py b/src/azul/service/repository_controller.py index 65fb800271..1582e1f3db 100644 --- a/src/azul/service/repository_controller.py +++ b/src/azul/service/repository_controller.py @@ -99,7 +99,7 @@ def _repository_service(self) -> RepositoryService: @cached_property def _index_service(self) -> IndexService: - return IndexService() + return IndexService(file_url_func=self._file_url) def _mirror_service(self, catalog: CatalogName) -> MirrorService: return self._index_service.mirror_service(catalog) diff --git a/test/service/test_response.py b/test/service/test_response.py index 0486d313c9..e0d5774272 100644 --- a/test/service/test_response.py +++ b/test/service/test_response.py @@ -208,8 +208,7 @@ def _service_index_service(self) -> IndexService: def _response_stage(self, entity_type: str) -> HCASearchResponseStage: return HCASearchResponseStage(service=self._service_index_service, entity_type=entity_type, - catalog=self.catalog, - file_url_func=self.file_url_func) + catalog=self.catalog) @property def paginations(self): From 6fd4a5df7bb4e27992005929d8af37a0abd62ce8 Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Mon, 18 May 2026 17:31:01 -0700 Subject: [PATCH 04/10] fixup! Store file URL function in IndexService --- src/azul/service/index_service.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/azul/service/index_service.py b/src/azul/service/index_service.py index 1cdb46c27c..4b912e55a1 100644 --- a/src/azul/service/index_service.py +++ b/src/azul/service/index_service.py @@ -112,7 +112,7 @@ def prepare_request(self, request: Search) -> Search: return request -@attrs.frozen(kw_only=True) +@attrs.frozen(auto_attribs=True, kw_only=True) class IndexService(QueryService): file_url_func: FileUrlFunc @@ -135,9 +135,6 @@ def search(self, :param pagination: A dictionary with pagination information as return from `_get_pagination()` :param filters: parsed JSON filters from the request :param item_id: If item_id is specified, only a single item is searched for - :param file_url_func: A function that is used only when getting a *list* of files data. - It creates the files URL based on info from the request. It should have the type - signature `(uuid: str, **params) -> str` :return: The OpenSearch JSON response """ if item_id is not None: From ed4728556318444801d5d7bef8fa704b8aadc6c0 Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Sun, 17 May 2026 15:43:18 -0700 Subject: [PATCH 05/10] Move mirror service from manifest generator to manifest service --- src/azul/service/index_service.py | 16 ++-------------- src/azul/service/manifest_service.py | 10 ++++------ src/azul/service/query_service.py | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/azul/service/index_service.py b/src/azul/service/index_service.py index 4b912e55a1..fb3e805de0 100644 --- a/src/azul/service/index_service.py +++ b/src/azul/service/index_service.py @@ -32,12 +32,6 @@ from azul.filters import ( Filters, ) -from azul.indexer.mirror_service import ( - MirrorService, -) -from azul.lib import ( - cache, -) from azul.lib.types import ( JSON, MutableJSON, @@ -51,14 +45,13 @@ ) from azul.service import ( BadArgumentException, - FileUrlFunc, ) from azul.service.query_service import ( + FileUrlService, IndexNotFoundError, OpenSearchStage, Pagination, PaginationStage, - QueryService, ResponseTriple, ToDictStage, _OpenSearchStage, @@ -113,12 +106,7 @@ def prepare_request(self, request: Search) -> Search: @attrs.frozen(auto_attribs=True, kw_only=True) -class IndexService(QueryService): - file_url_func: FileUrlFunc - - @cache - def mirror_service(self, catalog: CatalogName) -> MirrorService: - return MirrorService(catalog=catalog) +class IndexService(FileUrlService): def search(self, *, diff --git a/src/azul/service/manifest_service.py b/src/azul/service/manifest_service.py index 4a02f52b57..099aa1d548 100644 --- a/src/azul/service/manifest_service.py +++ b/src/azul/service/manifest_service.py @@ -166,17 +166,16 @@ manifest_config_to_json, ) from azul.service import ( - FileUrlFunc, avro_pfb, ) from azul.service.avro_pfb import ( PFBRelation, ) from azul.service.query_service import ( + FileUrlService, OpenSearchChain, Pagination, PaginationStage, - QueryService, SortKey, ToDictStage, sort_key_from_json, @@ -573,8 +572,7 @@ class CachedManifestNotFound(Exception): @attrs.frozen(kw_only=True) -class ManifestService(QueryService): - file_url_func: FileUrlFunc +class ManifestService(FileUrlService): @cached_property def storage_service(self) -> StorageService: @@ -810,9 +808,9 @@ def format(cls) -> ManifestFormat: def metadata_plugin(self) -> MetadataPlugin: return self.service.metadata_plugin(self.catalog) - @cached_property + @property def mirror_service(self) -> MirrorService: - return MirrorService(catalog=self.catalog) + return self.service.mirror_service(self.catalog) @classmethod @abstractmethod diff --git a/src/azul/service/query_service.py b/src/azul/service/query_service.py index 70cee20df0..0e48d22255 100644 --- a/src/azul/service/query_service.py +++ b/src/azul/service/query_service.py @@ -62,8 +62,12 @@ from azul.indexer.document_service import ( DocumentService, ) +from azul.indexer.mirror_service import ( + MirrorService, +) from azul.lib import ( R, + cache, cached_property, ) from azul.lib.types import ( @@ -85,6 +89,9 @@ MetadataPlugin, dotted, ) +from azul.service import ( + FileUrlFunc, +) log = logging.getLogger(__name__) @@ -709,3 +716,12 @@ def create_request(self, index=str(IndexName.create(catalog=catalog, qualifier=entity_type, doc_type=doc_type))) + + +@attrs.frozen(kw_only=True) +class FileUrlService(QueryService): + file_url_func: FileUrlFunc + + @cache + def mirror_service(self, catalog: CatalogName) -> MirrorService: + return MirrorService(catalog=catalog) From ab229e74318bef9e884ae781332c403fa9461e21 Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Sun, 17 May 2026 14:48:36 -0700 Subject: [PATCH 06/10] Refactor SearchResponseStage._file_url --- src/azul/plugins/metadata/anvil/service/response.py | 5 +---- src/azul/plugins/metadata/hca/service/response.py | 4 +--- src/azul/service/index_service.py | 9 +++++---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/azul/plugins/metadata/anvil/service/response.py b/src/azul/plugins/metadata/anvil/service/response.py index bdd48bfbb0..3d1fccdbba 100644 --- a/src/azul/plugins/metadata/anvil/service/response.py +++ b/src/azul/plugins/metadata/anvil/service/response.py @@ -32,7 +32,6 @@ json_sequence_of_mappings, json_str, json_untyped_dict, - optional, ) from azul.plugins import ( SpecialFields, @@ -227,9 +226,7 @@ def _pivotal_entity(self, ) -> MutableJSON: inner_entity = copy_json(inner_entity) if inner_entity_type == 'files': - inner_entity['azul_url'] = self._file_url(uuid=json_str(inner_entity['document_id']), - version=json_str(inner_entity['version']), - drs_uri=optional(json_str, inner_entity['drs_uri'])) + inner_entity['azul_url'] = self._file_url(inner_entity) inner_entity['azul_mirror_uri'] = self._file_mirror_uri(source, inner_entity) inner_entity.pop('version', None) return inner_entity diff --git a/src/azul/plugins/metadata/hca/service/response.py b/src/azul/plugins/metadata/hca/service/response.py index 4b27eecebd..c1b99d7cc9 100644 --- a/src/azul/plugins/metadata/hca/service/response.py +++ b/src/azul/plugins/metadata/hca/service/response.py @@ -419,9 +419,7 @@ def make_file(self, source: SourceRef, file: JSON) -> JSON: 'version': file.get('version'), 'matrixCellCount': file.get('matrix_cell_count'), 'drs_uri': file.get('drs_uri'), - 'azul_url': self._file_url(uuid=json_str(file['uuid']), - version=json_str(file['version']), - drs_uri=optional(json_str, file['drs_uri'])), + 'azul_url': self._file_url(file), 'azul_mirror_uri': self._file_mirror_uri(source, file), } return translated_file diff --git a/src/azul/service/index_service.py b/src/azul/service/index_service.py index fb3e805de0..f08e43db4f 100644 --- a/src/azul/service/index_service.py +++ b/src/azul/service/index_service.py @@ -77,15 +77,16 @@ class SearchResponseStage(_OpenSearchStage[ResponseTriple, MutableJSON], def prepare_request(self, request: Search) -> Search: return request - def _file_url(self, *, uuid: str, version: str, drs_uri: str | None) -> str | None: - if drs_uri is None: + def _file_url(self, file: JSON) -> str | None: + if file['drs_uri'] is None: # To download a file we need its DRS URI return None else: + special_fields = self.plugin.special_fields return str(self.service.file_url_func(catalog=self.catalog, fetch=False, - file_uuid=uuid, - version=version)) + file_uuid=file[special_fields.file_uuid.name_in_hit], + version=file['version'])) def _file_mirror_uri(self, source: SourceRef, file: JSON) -> str | None: file_cls = self.plugin.file_class From 92618414b72a730dcaed3977032d9e7ac32881be Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Sun, 17 May 2026 15:17:09 -0700 Subject: [PATCH 07/10] Consolidate duplicate methods --- src/azul/service/index_service.py | 14 ++---------- src/azul/service/manifest_service.py | 15 ++----------- src/azul/service/query_service.py | 32 ++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/src/azul/service/index_service.py b/src/azul/service/index_service.py index f08e43db4f..1361d2c42c 100644 --- a/src/azul/service/index_service.py +++ b/src/azul/service/index_service.py @@ -78,20 +78,10 @@ def prepare_request(self, request: Search) -> Search: return request def _file_url(self, file: JSON) -> str | None: - if file['drs_uri'] is None: - # To download a file we need its DRS URI - return None - else: - special_fields = self.plugin.special_fields - return str(self.service.file_url_func(catalog=self.catalog, - fetch=False, - file_uuid=file[special_fields.file_uuid.name_in_hit], - version=file['version'])) + return self.service.azul_file_url(self.catalog, file) def _file_mirror_uri(self, source: SourceRef, file: JSON) -> str | None: - file_cls = self.plugin.file_class - mirror_service = self.service.mirror_service(self.catalog) - return mirror_service.mirror_uri(source, file_cls, file) + return self.service.azul_mirror_uri(self.catalog, source, file) class SummaryResponseStage(OpenSearchStage[JSON, MutableJSON], diff --git a/src/azul/service/manifest_service.py b/src/azul/service/manifest_service.py index 099aa1d548..ad9891c722 100644 --- a/src/azul/service/manifest_service.py +++ b/src/azul/service/manifest_service.py @@ -962,7 +962,6 @@ def __init__(self, self.service = service self.catalog = catalog self.filters = filters - self.file_url_func = service.file_url_func manifest_namespace = UUID('ca1df635-b42c-4671-9322-b0a7209f0235') @@ -1140,20 +1139,10 @@ def _azul_file_url(self, file: JSON, args: Mapping = frozendict() ) -> str | None: - if file['drs_uri'] is None: - # To download a file we need its DRS URI - return None - else: - special_fields = self.metadata_plugin.special_fields - return str(self.file_url_func(catalog=self.catalog, - file_uuid=json_str(file[special_fields.file_uuid.name_in_hit]), - version=json_str(file['version']), - fetch=False, - **args)) + return self.service.azul_file_url(self.catalog, file, args) def _azul_mirror_uri(self, source: SourceRef, file: JSON) -> str | None: - file_cls = self.metadata_plugin.file_class - return self.mirror_service.mirror_uri(source, file_cls, file) + return self.service.azul_mirror_uri(self.catalog, source, file) @cache def _content_hash(self, *, by_bundle: bool) -> str: diff --git a/src/azul/service/query_service.py b/src/azul/service/query_service.py index 0e48d22255..a202e7c165 100644 --- a/src/azul/service/query_service.py +++ b/src/azul/service/query_service.py @@ -79,6 +79,7 @@ PrimitiveJSON, json_list, json_str, + optional, ) from azul.opensearch import ( OpenSearchClientFactory, @@ -92,6 +93,12 @@ from azul.service import ( FileUrlFunc, ) +from azul.source import ( + SourceRef, +) +from azul.vendored.frozendict import ( + frozendict, +) log = logging.getLogger(__name__) @@ -725,3 +732,28 @@ class FileUrlService(QueryService): @cache def mirror_service(self, catalog: CatalogName) -> MirrorService: return MirrorService(catalog=catalog) + + def azul_mirror_uri(self, + catalog: CatalogName, + source: SourceRef, + file: JSON + ) -> str | None: + file_cls = self.metadata_plugin(catalog).file_class + return self.mirror_service(catalog).mirror_uri(source, file_cls, file) + + def azul_file_url(self, + catalog: CatalogName, + file: JSON, + args: Mapping = frozendict() + ) -> str | None: + drs_uri = optional(json_str, file['drs_uri']) + if drs_uri is None: + # To download a file we need its DRS URI + return None + else: + special_fields = self.metadata_plugin(catalog).special_fields + return str(self.file_url_func(catalog=catalog, + file_uuid=json_str(file[special_fields.file_uuid.name_in_hit]), + version=json_str(file['version']), + fetch=False, + **args)) From 275c9bc5d2bd43eb0c01d8d5020f76e2d2cbe38b Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Mon, 18 May 2026 18:49:36 -0700 Subject: [PATCH 08/10] Extract test superclass --- test/service/test_response.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/test/service/test_response.py b/test/service/test_response.py index e0d5774272..ba472c3e3f 100644 --- a/test/service/test_response.py +++ b/test/service/test_response.py @@ -3749,7 +3749,7 @@ def test(self): }, response.json()) -class TestResponseWithDCP2Cans(DCP2CannedBundleTestCase, WebServiceTestCase): +class DCP2ResponseTestCase(DCP2CannedBundleTestCase, WebServiceTestCase): @classmethod def setUpClass(cls): @@ -3770,6 +3770,15 @@ def bundles(cls) -> list[SourcedBundleFQID]: version='2022-08-23T17:25:02.565000Z') ] + def get_file(self, entry_id: str) -> JSON: + url = self.base_url.set(path=('index', 'files', entry_id)) + response = requests.get(str(url)) + response.raise_for_status() + return one(response.json()['files']) + + +class TestResponseWithDCP2Cans(DCP2ResponseTestCase): + def test_tdr_sources(self): url = self.base_url.set(path='/index/projects') response = requests.get(str(url)) @@ -3787,12 +3796,6 @@ def test_tdr_sources(self): prefix=Prefix.parse(source[prefix_field])) self.assertEqual(self.source.ref, source) - def get_file(self, entry_id: str) -> JSON: - url = self.base_url.set(path=('index', 'files', entry_id)) - response = requests.get(str(url)) - response.raise_for_status() - return one(response.json()['files']) - def test_file_urls(self): with self.subTest(phantom=False): file = self.get_file('507d2814-1688-54e7-b73e-2f831aa34368') From 4d9d932b7c68d27c517fb458f06be8d66463683a Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Fri, 15 May 2026 13:32:51 -0700 Subject: [PATCH 09/10] Fix: curl manifest futilely includes external LungMAP files (#8027) --- src/azul/service/query_service.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/azul/service/query_service.py b/src/azul/service/query_service.py index a202e7c165..96d277a986 100644 --- a/src/azul/service/query_service.py +++ b/src/azul/service/query_service.py @@ -750,6 +750,16 @@ def azul_file_url(self, if drs_uri is None: # To download a file we need its DRS URI return None + elif ( + config.catalogs[catalog].atlas == 'lungmap' + and drs_uri.startswith('drs://dg.4503:') + ): + # Lungmap contains external files hosted on BioDataCatalyst. + # Downloading these files requires authentication that can't be + # provided by Azul, rendering our file URLs non-functional. If a + # user tries to follow such a URL, the request fails with a 401 + # status, so we avoid exposing them wherever possible. + return None else: special_fields = self.metadata_plugin(catalog).special_fields return str(self.file_url_func(catalog=catalog, From 40836f1235d52736e8bf8a6e9f373c49da54b733 Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Mon, 18 May 2026 17:33:36 -0700 Subject: [PATCH 10/10] fixup! Fix: curl manifest futilely includes external LungMAP files (#8027) --- src/azul/service/query_service.py | 2 +- test/service/test_response.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/azul/service/query_service.py b/src/azul/service/query_service.py index 96d277a986..d2aab0c2d6 100644 --- a/src/azul/service/query_service.py +++ b/src/azul/service/query_service.py @@ -754,7 +754,7 @@ def azul_file_url(self, config.catalogs[catalog].atlas == 'lungmap' and drs_uri.startswith('drs://dg.4503:') ): - # Lungmap contains external files hosted on BioDataCatalyst. + # LungMAP contains external files hosted on BioDataCatalyst. # Downloading these files requires authentication that can't be # provided by Azul, rendering our file URLs non-functional. If a # user tries to follow such a URL, the request fails with a 401 diff --git a/test/service/test_response.py b/test/service/test_response.py index ba472c3e3f..b129149928 100644 --- a/test/service/test_response.py +++ b/test/service/test_response.py @@ -39,6 +39,8 @@ LocalAppTestCase, ) from azul import ( + CatalogName, + Config, config, ) from azul.deployment import ( @@ -48,11 +50,13 @@ null_str, ) from azul.indexer import ( + Bundle, BundleFQID, SourcedBundleFQID, ) from azul.indexer.document import ( DocumentType, + EntityReference, IndexName, ) import azul.indexer.index_service @@ -3858,3 +3862,30 @@ def test_contributed_analyses_matrix(self): }} } self.assertEqual(expected_tree, project['contributedAnalyses']) + + +class TestExternalLungmapFiles(DCP2ResponseTestCase): + + @classmethod + def catalog_config(cls) -> dict[CatalogName, Config.Catalog]: + return { + name: attr.evolve(catalog, atlas='lungmap') + for name, catalog in super().catalog_config().items() + } + + external_file_uuid = '27fc1a2e-d70e-47ee-a4b7-92bf57e5b7a6' + # Compact identifier for BioDataCatalyst + external_drs_uri = 'drs://dg.4503:foo' + + @classmethod + def _index_bundle(cls, bundle: Bundle, *, delete: bool = False) -> None: + assert isinstance(bundle, HCABundle), bundle + for ref, entry in bundle.manifest.items(): + if EntityReference.parse(ref).entity_id == cls.external_file_uuid: + entry['drs_uri'] = cls.external_drs_uri + super()._index_bundle(bundle, delete=delete) + + def test_external_files(self): + file = self.get_file(self.external_file_uuid) + self.assertEqual(self.external_drs_uri, file['drs_uri']) + self.assertIsNone(file['azul_url'])