Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

## [0.18.0] - 2026-06-15

### Added

- Entries are now searchable by the **human-readable name** of the Benchling objects they reference. `entry.json` gains a `links` array — one `{type, id, name, slug}` entry per referenced object — promoted into the package metadata so `links.name` queries match real names (e.g. find every experiment that references `QB-2743.1`). `name` is resolved from the Benchling API (best-effort `get_by_id`; `null` when the app lacks registry access or the type is unsupported); `slug` is a lossy token parsed from the `webURL` for display/debugging only and is never used as a name

### Changed

- `references.json` renamed to **`links.json`** and reduced to raw discovery facts only — `id`/`type`/`web_url` per link, plus `entities` and `results_tables`. Derived classifications (`category`/`fetchable`/`eventable`/`disposition`) are no longer persisted; they are recomputed from `type` in code at runtime, so the raw archive stays reprocessable and a future classification change needs no re-fetch. `schema_version` bumped to `2`

## [0.17.2] - 2026-04-15

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion docker/app-manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ manifestVersion: 1
info:
name: nightly-quilttest-com
description: Packaging Benchling Notebooks as Quilt packages
version: 0.17.2
version: 0.18.0
features:
- name: Quilt Connector
id: quilt_entry
Expand Down
2 changes: 1 addition & 1 deletion docker/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "benchling-quilt-integration"
version = "0.17.2"
version = "0.18.0"
description = "Benchling-Quilt Integration Webhook Service"
license = {text = "Apache-2.0"}
authors = [
Expand Down
78 changes: 77 additions & 1 deletion docker/src/entry_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,29 @@

from .auth import RoleManager
from .config import get_config
from .entry_references import link_metadata, summarize_references
from .payload import Payload
from .retry_utils import LAMBDA_INVOKE_RETRY, REST_API_RETRY

logger = structlog.get_logger(__name__)

# EntryLink ``type`` -> Benchling SDK service attribute used to resolve the
# authoritative human-readable name via GET-by-id. Only types with a stable
# get_by_id are listed (entities first, then inventory and entries -- the types
# John flagged on the 2026-06-15 call); any other type keeps ``name=None`` and
# relies on its slug. Name resolution requires the app to be a registry/project
# collaborator, so every lookup is best-effort (see _enrich_link_names).
LINK_TYPE_TO_SERVICE: Dict[str, str] = {
"custom_entity": "custom_entities",
"dna_sequence": "dna_sequences",
"aa_sequence": "aa_sequences",
"container": "containers",
"box": "boxes",
"plate": "plates",
"location": "locations",
"entry": "entries",
}


class DateTimeEncoder(json.JSONEncoder):
"""Custom JSON encoder that converts datetime objects to ISO format strings."""
Expand Down Expand Up @@ -629,6 +647,13 @@ def _create_metadata_files(
if canvas_id is not None:
entry_json["canvas_id"] = canvas_id

# links - curated, searchable view of the objects this entry references.
# Promoted into entry.json (the package's metadata_uri) so `links.name` is
# queryable. Names are resolved best-effort against the Benchling API.
links = link_metadata(entry_data)
self._enrich_link_names(links)
entry_json["links"] = links

# input.json - Processing metadata
input_json = {
"source": "benchling_webhook",
Expand Down Expand Up @@ -684,13 +709,20 @@ def _create_metadata_files(
"""

for file_info in uploaded_files:
if file_info["filename"] not in ["entry.json", "entry_data.json", "input.json", "README.md"]:
if file_info["filename"] not in [
"entry.json",
"entry_data.json",
"input.json",
"links.json",
"README.md",
]:
readme_content += f"- `{file_info['filename']}` ({file_info['size']} bytes)\n"

readme_content += """
## Metadata Files
- `entry.json`: Key entry metadata (display_id, name, creator, authors, timestamps)
- `entry_data.json`: Complete entry data from Benchling API
- `links.json`: Raw Benchling objects this entry links to (entities, inventory, tables); the searchable, name-enriched summary is the `links` field of `entry.json`
- `input.json`: Export processing metadata
- `README.md`: This documentation file

Expand All @@ -699,13 +731,57 @@ def _create_metadata_files(
For questions about the data, refer to the original Benchling entry.
"""

# links.json - raw discovery of the entities/resources this entry points at,
# from the entry's note links and fields (no Benchling records are fetched
# here; inferences are not persisted). The searchable view is entry.json.links.
links_json = summarize_references(entry_data)

return {
"entry.json": entry_json,
"entry_data.json": entry_data,
"links.json": links_json,
"input.json": input_json,
"README.md": readme_content,
}

def _enrich_link_names(self, links: list[Dict[str, Any]]) -> None:
"""Fill each link's ``name`` with its authoritative Benchling display name.

Mutates ``links`` in place (the curated entries from ``link_metadata``).
Best-effort and never raises: a missing client, an unsupported type, or a
forbidden/failed GET-by-id leaves ``name`` as ``None`` -- the slug stays for
eyeballing, and the slug is never promoted into ``name``. Name resolution
needs the app to be a registry/project collaborator (see AGENTS / setup).
"""
if not self.benchling:
return
for link in links:
service_attr = LINK_TYPE_TO_SERVICE.get(link.get("type") or "")
link_id = link.get("id")
if not service_attr or not link_id:
continue
service = getattr(self.benchling, service_attr, None)
if service is None:
continue
record = None
try:
# ``returning`` trims the payload to just the name where supported.
record = service.get_by_id(link_id, returning=["name"])
except Exception:
try:
record = service.get_by_id(link_id)
except Exception as exc:
self.logger.debug(
"Link name lookup failed",
link_id=link_id,
link_type=link.get("type"),
error=str(exc),
)
continue
record_name = getattr(record, "name", None)
if isinstance(record_name, str) and record_name:
link["name"] = record_name

def _load_existing_canvas_id_from_entry_json(self, s3_client: Any, package_name: str) -> Optional[str]:
"""Read canvas_id from a previously-written ``entry.json``, if any.

Expand Down
Loading