Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions src/azul/plugins/metadata/anvil/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
json_sequence_of_mappings,
json_str,
json_untyped_dict,
optional,
)
from azul.plugins import (
SpecialFields,
Expand Down Expand Up @@ -227,10 +226,9 @@ def _pivotal_entity(self,
) -> MutableJSON:
inner_entity = copy_json(inner_entity)
if inner_entity_type == 'files':
inner_entity['azul_url'] = self._file_url(uuid=json_str(inner_entity['document_id']),
version=json_str(inner_entity['version']),
drs_uri=optional(json_str, inner_entity['drs_uri']))
inner_entity['azul_mirror_uri'] = self._file_mirror_uri(source, inner_entity)
inner_entity['azul_url'] = self.service.azul_file_url(inner_entity)
inner_entity['azul_mirror_uri'] = self.service.azul_mirror_uri(source,
inner_entity)
inner_entity.pop('version', None)
return inner_entity

Expand Down
6 changes: 2 additions & 4 deletions src/azul/plugins/metadata/hca/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,10 +419,8 @@ def make_file(self, source: SourceRef, file: JSON) -> JSON:
'version': file.get('version'),
'matrixCellCount': file.get('matrix_cell_count'),
'drs_uri': file.get('drs_uri'),
'azul_url': self._file_url(uuid=json_str(file['uuid']),
version=json_str(file['version']),
drs_uri=optional(json_str, file['drs_uri'])),
'azul_mirror_uri': self._file_mirror_uri(source, file),
'azul_url': self.service.azul_file_url(file),
'azul_mirror_uri': self.service.azul_mirror_uri(source, file),
}
return translated_file

Expand Down
2 changes: 1 addition & 1 deletion src/azul/service/drs_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class DRSController(ServiceController):

@cached_property
def _service(self) -> IndexService:
return IndexService()
return IndexService(file_url_func=self._file_url)

_drs_spec_description = fd('''
This is a partial implementation of the [DRS 1.0.0 spec][1]. Not all
Expand Down
3 changes: 1 addition & 2 deletions src/azul/service/index_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class IndexController(QueryController):

@cached_property
def _service(self) -> IndexService:
return IndexService()
return IndexService(file_url_func=self._file_url)

_min_page_size = 1

Expand Down Expand Up @@ -370,7 +370,6 @@ def search(self, entity_type: str, entity_id: str | None = None) -> str | JSON:
try:
response = self._service.search(catalog=self.app.catalog,
entity_type=entity_type,
file_url_func=self._file_url,
item_id=entity_id,
filters=filters,
pagination=pagination)
Expand Down
59 changes: 37 additions & 22 deletions src/azul/service/index_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
from azul.lib.types import (
JSON,
MutableJSON,
json_str,
optional,
)
from azul.lib.uuids import (
validate_uuid,
Expand All @@ -66,6 +68,9 @@
from azul.source import (
SourceRef,
)
from azul.vendored.frozendict import (
frozendict,
)

log = logging.getLogger(__name__)

Expand All @@ -80,26 +85,10 @@ def __init__(self, entity_type: str, entity_id: str):
class SearchResponseStage(_OpenSearchStage[ResponseTriple, MutableJSON],
metaclass=ABCMeta):
service: IndexService
file_url_func: FileUrlFunc

def prepare_request(self, request: Search) -> Search:
return request

def _file_url(self, *, uuid: str, version: str, drs_uri: str | None) -> str | None:
if drs_uri is None:
# To download a file we need its DRS URI
return None
else:
return str(self.file_url_func(catalog=self.catalog,
fetch=False,
file_uuid=uuid,
version=version))

def _file_mirror_uri(self, source: SourceRef, file: JSON) -> str | None:
file_cls = self.plugin.file_class
mirror_service = self.service.mirror_service(self.catalog)
return mirror_service.mirror_uri(source, file_cls, file)


class SummaryResponseStage(OpenSearchStage[JSON, MutableJSON],
metaclass=ABCMeta):
Expand All @@ -113,7 +102,9 @@ def prepare_request(self, request: Search) -> Search:
return request


@attrs.frozen(kw_only=True)
class IndexService(QueryService):
file_url_func: FileUrlFunc

@cache
def mirror_service(self, catalog: CatalogName) -> MirrorService:
Expand All @@ -123,7 +114,6 @@ def search(self,
*,
catalog: CatalogName,
entity_type: str,
file_url_func: FileUrlFunc,
item_id: str | None,
filters: Filters,
pagination: Pagination
Expand All @@ -148,8 +138,7 @@ def search(self,
filters=filters,
pagination=pagination,
aggregate=item_id is None,
entity_type=entity_type,
file_url_func=file_url_func)
entity_type=entity_type)

special_fields = self.metadata_plugin(catalog).special_fields
for hit in response['hits']:
Expand All @@ -169,7 +158,6 @@ def _search(self,
aggregate: bool,
filters: Filters,
pagination: Pagination,
file_url_func: FileUrlFunc
) -> MutableJSON:
"""
This function does the whole transformation process. It takes the path
Expand Down Expand Up @@ -225,8 +213,7 @@ def _search(self,
response_stage_cls = plugin.search_response_stage
chain = response_stage_cls(service=self,
catalog=catalog,
entity_type=entity_type,
file_url_func=file_url_func).wrap(chain)
entity_type=entity_type).wrap(chain)

request = self.create_request(catalog, entity_type)
request = chain.prepare_request(request)
Expand Down Expand Up @@ -366,3 +353,31 @@ def _hit_to_doc(hit: Hit) -> JSON:
@property
def always_limit_access(self) -> bool:
return False

def azul_mirror_uri(self, source: SourceRef, file: JSON) -> str | None:
file_cls = self.metadata_plugin.file_class
return self.mirror_service.mirror_uri(source, file_cls, file)

def azul_file_url(self,
file: JSON,
args: Mapping = frozendict()
) -> str | None:
drs_uri = optional(json_str, file['drs_uri'])
if drs_uri is None:
# To download a file we need its DRS URI
return None
elif (
config.catalogs[self.catalog].atlas == 'lungmap'
and drs_uri.startswith('drs://dg.4503:')
):
# Lungmap contains external files hosted on BioDataCatalyst.
# Downloading these files requires authentication that can't be
# provided by Azul, and our file URL fails with a 401.
return None
else:
special_fields = self.metadata_plugin.special_fields
return str(self.file_url_func(catalog=self.catalog,
file_uuid=json_str(file[special_fields.file_uuid.name_in_hit]),
version=json_str(file['version']),
fetch=False,
**args))
41 changes: 9 additions & 32 deletions src/azul/service/manifest_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,17 +166,18 @@
manifest_config_to_json,
)
from azul.service import (
FileUrlFunc,
avro_pfb,
)
from azul.service.avro_pfb import (
PFBRelation,
)
from azul.service.index_service import (
IndexService,
)
from azul.service.query_service import (
OpenSearchChain,
Pagination,
PaginationStage,
QueryService,
SortKey,
ToDictStage,
sort_key_from_json,
Expand All @@ -190,9 +191,6 @@
Prefix,
SourceRef,
)
from azul.vendored.frozendict import (
frozendict,
)

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -573,8 +571,7 @@ class CachedManifestNotFound(Exception):


@attrs.frozen(kw_only=True)
class ManifestService(QueryService):
file_url_func: FileUrlFunc
class ManifestService(IndexService):

@cached_property
def storage_service(self) -> StorageService:
Expand Down Expand Up @@ -964,7 +961,6 @@ def __init__(self,
self.service = service
self.catalog = catalog
self.filters = filters
self.file_url_func = service.file_url_func

manifest_namespace = UUID('ca1df635-b42c-4671-9322-b0a7209f0235')

Expand Down Expand Up @@ -1138,25 +1134,6 @@ def _get_entities(self, field_path: FieldPath, doc: JSON) -> JSONs:
assert json_elements_are_mappings(entities)
return entities

def _azul_file_url(self,
file: JSON,
args: Mapping = frozendict()
) -> str | None:
if file['drs_uri'] is None:
# To download a file we need its DRS URI
return None
else:
special_fields = self.metadata_plugin.special_fields
return str(self.file_url_func(catalog=self.catalog,
file_uuid=json_str(file[special_fields.file_uuid.name_in_hit]),
version=json_str(file['version']),
fetch=False,
**args))

def _azul_mirror_uri(self, source: SourceRef, file: JSON) -> str | None:
file_cls = self.metadata_plugin.file_class
return self.mirror_service.mirror_uri(source, file_cls, file)

@cache
def _content_hash(self, *, by_bundle: bool) -> str:
"""
Expand Down Expand Up @@ -1538,7 +1515,7 @@ def _write(file: JSON, is_related_file: bool = False):
} if is_related_file else {
}

file_url = self._azul_file_url(file, args)
file_url = self.service.azul_file_url(file, args)
if file_url is None:
output.write(f"# File {file[file_uuid_field]!r}, version {file['version']!r} "
f"is currently not available in catalog {self.catalog!r}.\n\n")
Expand Down Expand Up @@ -1759,9 +1736,9 @@ def write_page_to(self,
if field_path == ('contents', 'files'):
file = copy_json(one(entities))
if 'file_url' in column_mapping:
file['file_url'] = self._azul_file_url(file)
file['file_url'] = self.service.azul_file_url(file)
if 'file_mirror_uri' in column_mapping:
file['file_mirror_uri'] = self._azul_mirror_uri(source, file)
file['file_mirror_uri'] = self.service.azul_mirror_uri(source, file)
entities = [file]
self._extract_fields(field_path=field_path,
entities=entities,
Expand All @@ -1775,9 +1752,9 @@ def write_page_to(self,
related_row: Cells = {}
file.update(related_file)
if 'file_url' in column_mapping:
file['file_url'] = self._azul_file_url(file)
file['file_url'] = self.service.azul_file_url(file)
if 'file_mirror_uri' in column_mapping:
file['file_mirror_uri'] = self._azul_mirror_uri(source, file)
file['file_mirror_uri'] = self.service.azul_mirror_uri(source, file)
self._extract_fields(field_path=field_path,
entities=[file],
column_mapping=column_mapping,
Expand Down
2 changes: 1 addition & 1 deletion src/azul/service/repository_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _repository_service(self) -> RepositoryService:

@cached_property
def _index_service(self) -> IndexService:
return IndexService()
return IndexService(file_url_func=self._file_url)

def _mirror_service(self, catalog: CatalogName) -> MirrorService:
return self._index_service.mirror_service(catalog)
Expand Down
3 changes: 1 addition & 2 deletions test/service/test_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,7 @@ def _service_index_service(self) -> IndexService:
def _response_stage(self, entity_type: str) -> HCASearchResponseStage:
return HCASearchResponseStage(service=self._service_index_service,
entity_type=entity_type,
catalog=self.catalog,
file_url_func=self.file_url_func)
catalog=self.catalog)

@property
def paginations(self):
Expand Down
Loading