Skip to content

Commit 36e298b

Browse files
ZalfstenCopilot
andcommitted
Enhance API client to support JSON strings for ARC submissions and improve error handling for invalid JSON inputs
Co-authored-by: Copilot <copilot@github.com>
1 parent 793924a commit 36e298b

3 files changed

Lines changed: 118 additions & 39 deletions

File tree

middleware/api_client/README.md

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@ async def main():
5151
# Send a single ARC
5252
response = await client.create_or_update_arc(
5353
rdi="my-rdi",
54-
arc=arc,
54+
arc=arc, # Can be ARC object, dict, or JSON string
5555
)
5656
print(f"ARC status: {response.status}")
5757

5858
# Or run a harvest workflow
5959
async def arc_stream():
60-
yield arc
60+
yield arc # Can yield ARC objects, dicts, or JSON strings
6161

6262
harvest = await client.harvest_arcs(
6363
rdi="my-rdi",
@@ -85,22 +85,22 @@ asyncio.run(main())
8585

8686
## API Methods
8787

88-
### `create_or_update_arc(rdi: str, arc: ARC | dict) -> ArcResult`
88+
### `create_or_update_arc(rdi: str, arc: ARC | dict | str) -> ArcResult`
8989

9090
Create or update one ARC in the Middleware API.
9191

9292
**Parameters:**
9393

9494
- `rdi` (str): The RDI identifier (e.g., "edaphobase").
95-
- `arc` (ARC | dict): ARC object from arctrl or pre-serialised RO-Crate dict.
95+
- `arc` (ARC | dict | str): ARC object from arctrl, pre-serialised RO-Crate dict, or JSON string.
9696

9797
**Returns:**
9898

9999
- `ArcResult`: Contains the result of the operation.
100100

101101
**Raises:**
102102

103-
- `ApiClientError`: If the request fails due to HTTP errors or network issues.
103+
- `ApiClientError`: If the request fails due to HTTP errors, network issues, or invalid JSON.
104104

105105
**Example:**
106106

@@ -112,17 +112,18 @@ arc = ARC.from_arc_investigation(inv)
112112

113113
response = await client.create_or_update_arc(
114114
rdi="edaphobase",
115-
arc=arc,
115+
arc=arc, # Can also be dict or JSON string
116116
)
117117
```
118118

119-
### `harvest_arcs(rdi: str, arcs: AsyncIterator[ARC | dict], expected_datasets: int | None = None) -> HarvestResult`
119+
### `harvest_arcs(rdi: str, arcs: AsyncIterator[ARC | dict | str], expected_datasets: int | None = None) -> HarvestResult`
120120

121121
Convenience workflow to create a harvest, upload all ARCs from an async iterator, and complete the harvest.
122122

123123
- Uses `config.max_concurrency` by default.
124124
- Continues on item-level submission errors and skips failed items.
125125
- Cancels the harvest only for catastrophic errors.
126+
- Supports ARC objects, pre-serialised RO-Crate dicts, and JSON strings.
126127

127128
All errors are raised as `ApiClientError` exceptions:
128129

@@ -132,24 +133,7 @@ from middleware.api_client import ApiClientError
132133
try:
133134
response = await client.create_or_update_arc(
134135
rdi="my-rdi",
135-
arc=arc,
136+
arc=arc, # Can be ARC object, dict, or JSON string
136137
)
137138
except ApiClientError as e:
138139
print(f"API Error: {e}")
139-
```
140-
141-
## Configuration via Environment Variables
142-
143-
You can override configuration values using environment variables:
144-
145-
```bash
146-
export API_URL="https://production-api:8000"
147-
export CLIENT_CERT_PATH="/secure/certs/prod-cert.pem"
148-
export CLIENT_KEY_PATH="/secure/certs/prod-key.pem"
149-
```
150-
151-
Or use Docker secrets in `/run/secrets/`.
152-
153-
## License
154-
155-
This is part of the FAIRagro Advanced Middleware project.

middleware/api_client/src/middleware/api_client/api_client.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -262,13 +262,13 @@ def _process_completed_arc_tasks(
262262
async def _submit_arcs_parallel(
263263
self,
264264
harvest_id: str,
265-
arcs: "AsyncGenerator[ARC | dict[str, Any], None] | AsyncIterator[ARC | dict[str, Any]]",
265+
arcs: "AsyncGenerator[ARC | dict[str, Any] | str, None] | AsyncIterator[ARC | dict[str, Any] | str]",
266266
) -> int:
267267
"""Submit all ARCs in bounded parallelism and return number of skipped ARC submissions."""
268268
pending_tasks: set[asyncio.Task[None]] = set()
269269
failed_submissions = 0
270270

271-
async def submit_one(arc_item: "ARC | dict[str, Any]") -> None:
271+
async def submit_one(arc_item: "ARC | dict[str, Any] | str") -> None:
272272
await self.submit_arc_in_harvest(harvest_id, arc_item)
273273

274274
async for arc in arcs:
@@ -446,10 +446,15 @@ async def _delete(self, path: str) -> None:
446446
# ------------------------------------------------------------------
447447

448448
@classmethod
449-
def _serialize_arc(cls, arc: "ARC | dict[str, Any]") -> dict[str, Any]:
450-
"""Serialize an ARC object to a plain RO-Crate JSON dict."""
449+
def _serialize_arc(cls, arc: "ARC | dict[str, Any] | str") -> dict[str, Any]:
450+
"""Serialize an ARC object, dict, or JSON string to a plain RO-Crate JSON dict."""
451451
if isinstance(arc, dict):
452452
return arc
453+
if isinstance(arc, str):
454+
try:
455+
return cast(dict[str, Any], json.loads(arc))
456+
except json.JSONDecodeError as e:
457+
raise ApiClientError(f"Invalid JSON string provided for ARC: {e}") from e
453458
return cast(dict[str, Any], json.loads(arc.ToROCrateJsonString()))
454459

455460
@classmethod
@@ -473,7 +478,7 @@ def _parse_harvest_response(cls, data: Any) -> HarvestResult:
473478
async def create_or_update_arc(
474479
self,
475480
rdi: str,
476-
arc: "ARC | dict[str, Any]",
481+
arc: "ARC | dict[str, Any] | str",
477482
) -> ArcResult:
478483
"""Create or update an ARC.
479484
@@ -483,7 +488,7 @@ async def create_or_update_arc(
483488
484489
Args:
485490
rdi: RDI identifier.
486-
arc: ARC object or a pre-serialised RO-Crate JSON dict.
491+
arc: ARC object, a pre-serialised RO-Crate JSON dict, or a JSON string.
487492
488493
Returns:
489494
:class:`ArcResult` with the result of the operation.
@@ -579,7 +584,7 @@ async def cancel_harvest(self, harvest_id: str) -> None:
579584
async def submit_arc_in_harvest(
580585
self,
581586
harvest_id: str,
582-
arc: "ARC | dict[str, Any]",
587+
arc: "ARC | dict[str, Any] | str",
583588
) -> ArcResult:
584589
"""Submit an ARC within an active harvest run.
585590
@@ -588,7 +593,7 @@ async def submit_arc_in_harvest(
588593
589594
Args:
590595
harvest_id: Harvest identifier.
591-
arc: ARC object or a pre-serialised RO-Crate JSON dict.
596+
arc: ARC object, a pre-serialised RO-Crate JSON dict, or a JSON string.
592597
593598
Returns:
594599
:class:`ArcResult` with the result of the operation.
@@ -601,7 +606,7 @@ async def submit_arc_in_harvest(
601606
async def harvest_arcs(
602607
self,
603608
rdi: str,
604-
arcs: "AsyncGenerator[ARC | dict[str, Any], None] | AsyncIterator[ARC | dict[str, Any]]",
609+
arcs: "AsyncGenerator[ARC | dict[str, Any] | str, None] | AsyncIterator[ARC | dict[str, Any] | str]",
605610
expected_datasets: int | None = None,
606611
) -> HarvestResult:
607612
"""Create a harvest, upload all ARCs from an async generator, then complete it.
@@ -618,8 +623,8 @@ async def harvest_arcs(
618623
619624
Args:
620625
rdi: RDI identifier for the harvest.
621-
arcs: Async generator or async iterator yielding ARC objects or
622-
pre-serialised RO-Crate dicts.
626+
arcs: Async generator or async iterator yielding ARC objects,
627+
pre-serialised RO-Crate dicts, or JSON strings.
623628
expected_datasets: Optional hint about the total number of ARCs.
624629
625630
Returns:
@@ -631,7 +636,7 @@ async def harvest_arcs(
631636
632637
Example::
633638
634-
async def my_arcs() -> AsyncGenerator[dict, None]:
639+
async def my_arcs() -> AsyncGenerator[dict | str, None]:
635640
for arc in source:
636641
yield arc
637642

middleware/api_client/tests/unit/test_client.py

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,28 @@ async def test_create_or_update_arc_with_dict(client_config: Config) -> None:
226226
assert isinstance(response, ArcResult)
227227

228228

229+
@pytest.mark.asyncio
230+
@respx.mock
231+
async def test_create_or_update_arc_with_json_string(client_config: Config) -> None:
232+
"""Test create_or_update_arc with a JSON string."""
233+
route = respx.post(f"{client_config.api_url}v3/arcs").mock(
234+
return_value=httpx.Response(http.HTTPStatus.OK, json=_ARC_RESPONSE)
235+
)
236+
async with ApiClient(client_config) as client:
237+
response = await client.create_or_update_arc(rdi="test-rdi", arc='{"id": "mock-arc"}')
238+
assert route.called
239+
assert isinstance(response, ArcResult)
240+
assert response.arc_id == "arc-123"
241+
242+
243+
@pytest.mark.asyncio
244+
async def test_create_or_update_arc_with_invalid_json_string(client_config: Config) -> None:
245+
"""Test create_or_update_arc with an invalid JSON string."""
246+
async with ApiClient(client_config) as client:
247+
with pytest.raises(ApiClientError, match="Invalid JSON string provided for ARC"):
248+
await client.create_or_update_arc(rdi="test-rdi", arc='{"id": "mock-arc"')
249+
250+
229251
@pytest.mark.asyncio
230252
@respx.mock
231253
async def test_create_or_update_arc_http_error(client_config: Config) -> None:
@@ -495,13 +517,35 @@ async def test_submit_arc_in_harvest_invalid_response(client_config: Config) ->
495517
await client.submit_arc_in_harvest("harvest-456", arc={"id": "mock"})
496518

497519

520+
@pytest.mark.asyncio
521+
@respx.mock
522+
async def test_submit_arc_in_harvest_with_json_string(client_config: Config) -> None:
523+
"""Test submit_arc_in_harvest with a JSON string."""
524+
route = respx.post(f"{client_config.api_url}v3/harvests/harvest-456/arcs").mock(
525+
return_value=httpx.Response(http.HTTPStatus.OK, json=_ARC_RESPONSE)
526+
)
527+
async with ApiClient(client_config) as client:
528+
response = await client.submit_arc_in_harvest("harvest-456", arc='{"id": "mock-arc"}')
529+
assert route.called
530+
assert isinstance(response, ArcResult)
531+
assert response.arc_id == "arc-123"
532+
533+
534+
@pytest.mark.asyncio
535+
async def test_submit_arc_in_harvest_with_invalid_json_string(client_config: Config) -> None:
536+
"""Test submit_arc_in_harvest with an invalid JSON string."""
537+
async with ApiClient(client_config) as client:
538+
with pytest.raises(ApiClientError, match="Invalid JSON string provided for ARC"):
539+
await client.submit_arc_in_harvest("harvest-456", arc='{"id": "mock-arc"')
540+
541+
498542
# ---------------------------------------------------------------------------
499543
# harvest_arcs
500544
# ---------------------------------------------------------------------------
501545

502546

503-
async def _arc_gen(*arcs: "dict[str, Any]") -> AsyncGenerator["dict[str, Any]", None]:
504-
"""Yield the provided arc dicts as an async generator."""
547+
async def _arc_gen(*arcs: "dict[str, Any] | str | ARC") -> AsyncGenerator["dict[str, Any] | str | ARC", None]:
548+
"""Yield the provided arc dicts, JSON strings, or ARC objects as an async generator."""
505549
for arc in arcs:
506550
yield arc
507551

@@ -650,6 +694,52 @@ async def test_harvest_arcs_cancels_on_catastrophic_error(client_config: Config)
650694
assert cancel_route.called
651695

652696

697+
@pytest.mark.asyncio
698+
@respx.mock
699+
async def test_harvest_arcs_with_json_string(client_config: Config) -> None:
700+
"""harvest_arcs supports JSON strings in async generator."""
701+
completed_response = {**_HARVEST_RESPONSE, "status": "COMPLETED", "completed_at": "2024-01-01T01:00:00Z"}
702+
respx.post(f"{client_config.api_url}v3/harvests").mock(
703+
return_value=httpx.Response(http.HTTPStatus.OK, json=_HARVEST_RESPONSE)
704+
)
705+
respx.post(f"{client_config.api_url}v3/harvests/harvest-456/arcs").mock(
706+
return_value=httpx.Response(http.HTTPStatus.OK, json=_ARC_RESPONSE)
707+
)
708+
respx.post(f"{client_config.api_url}v3/harvests/harvest-456/complete").mock(
709+
return_value=httpx.Response(http.HTTPStatus.OK, json=completed_response)
710+
)
711+
712+
arcs = _arc_gen(
713+
'{"id": "arc-1-string"}',
714+
{"id": "arc-2-dict"},
715+
ARC.from_arc_investigation(ArcInvestigation.create(identifier="test", title="Test")),
716+
)
717+
async with ApiClient(client_config) as client:
718+
result = await client.harvest_arcs("test-rdi", arcs, expected_datasets=3)
719+
720+
assert isinstance(result, HarvestResult)
721+
assert result.status == "COMPLETED"
722+
723+
724+
@pytest.mark.asyncio
725+
@respx.mock
726+
async def test_harvest_arcs_with_invalid_json_string(client_config: Config) -> None:
727+
"""harvest_arcs raises ApiClientError when JSON string is invalid."""
728+
# Mock the harvest creation endpoint to prevent actual HTTP requests
729+
respx.post(f"{client_config.api_url}v3/harvests").mock(
730+
return_value=httpx.Response(http.HTTPStatus.OK, json=_HARVEST_RESPONSE)
731+
)
732+
# Mock the harvest cancellation endpoint
733+
respx.delete(f"{client_config.api_url}v3/harvests/harvest-456").mock(
734+
return_value=httpx.Response(http.HTTPStatus.NO_CONTENT)
735+
)
736+
737+
async with ApiClient(client_config) as client:
738+
arcs = _arc_gen('{"id": "arc-1"') # Single invalid JSON string
739+
with pytest.raises(ApiClientError, match="Invalid JSON string provided for ARC"):
740+
await client.harvest_arcs("test-rdi", arcs)
741+
742+
653743
@pytest.mark.asyncio
654744
@respx.mock
655745
async def test_harvest_arcs_cancel_failure_does_not_mask_original_error(client_config: Config) -> None:

0 commit comments

Comments
 (0)