From 778963c3bdf7b91448692a4103ef9143e06f786a Mon Sep 17 00:00:00 2001
From: Fran McDade <18710366+frano-m@users.noreply.github.com>
Date: Wed, 13 May 2026 22:27:46 +1000
Subject: [PATCH 1/3] feat: [lungmap] add lungmap projects to google datasets
 catalog (#4808)

---
 .../schemaOrg/lungmapProjectDataset.test.ts   |  74 ++++++
 app/utils/schemaOrg/hcaProjectDataset.ts      | 200 +---------------
 app/utils/schemaOrg/lungmapProjectDataset.ts  |  27 +++
 app/utils/schemaOrg/projectDataset.ts         | 224 ++++++++++++++++++
 pages/[entityListType]/[...params].tsx        |   4 +
 5 files changed, 337 insertions(+), 192 deletions(-)
 create mode 100644 __tests__/utils/schemaOrg/lungmapProjectDataset.test.ts
 create mode 100644 app/utils/schemaOrg/lungmapProjectDataset.ts
 create mode 100644 app/utils/schemaOrg/projectDataset.ts

diff --git a/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts b/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts
new file mode 100644
index 000000000..67688c89b
--- /dev/null
+++ b/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts
@@ -0,0 +1,74 @@
+import type { ProjectsResponse } from "../../../app/apis/azul/hca-dcp/common/responses";
+import { buildLungmapProjectJsonLd } from "../../../app/utils/schemaOrg/lungmapProjectDataset";
+
+const BROWSER_URL = "https://data-browser.lungmap.net";
+
+/**
+ * Builds a minimal valid project response for the LungMAP wrapper. The full
+ * mapping is covered by `hcaProjectDataset.test.ts` (same shared core); this
+ * file only verifies the LungMAP-specific catalog identity surfaces correctly.
+ * @returns A `ProjectsResponse` shape sufficient for catalog-identity checks.
+ */
+function makeProjectsResponse(): ProjectsResponse {
+  return {
+    dates: [],
+    donorOrganisms: [],
+    entryId: "abc",
+    fileTypeSummaries: [],
+    projects: [
+      {
+        accessible: true,
+        accessions: [],
+        bionetworkName: [],
+        contributedAnalyses: {},
+        contributors: [],
+        dataUseRestriction: null,
+        duosId: null,
+        estimatedCellCount: null,
+        laboratory: [],
+        matrices: {},
+        projectDescription:
+          "A study of lung development and disease across many donors.",
+        projectId: "uuid-1",
+        projectShortname: "Lung Study",
+        projectTitle: "Lung development atlas",
+      },
+    ],
+    protocols: [],
+    samples: [],
+    specimens: [],
+    status: 200,
+  } as unknown as ProjectsResponse;
+}
+
+describe("buildLungmapProjectJsonLd", () => {
+  it("returns undefined when no project is present", () => {
+    const response = { ...makeProjectsResponse(), projects: [] };
+    expect(
+      buildLungmapProjectJsonLd(response as ProjectsResponse, BROWSER_URL)
+    ).toBeUndefined();
+  });
+
+  it("surfaces LungMAP as the catalog identity and uses the projects URL pattern", () => {
+    const result = buildLungmapProjectJsonLd(
+      makeProjectsResponse(),
+      BROWSER_URL
+    );
+    expect(result).toBeDefined();
+    expect(result!.includedInDataCatalog).toEqual({
+      "@type": "DataCatalog",
+      name: "LungMAP Data Explorer",
+      url: BROWSER_URL,
+    });
+    expect(result!.url).toBe(`${BROWSER_URL}/projects/uuid-1`);
+  });
+
+  it("pads short descriptions with the LungMAP catalog suffix", () => {
+    const response = makeProjectsResponse();
+    response.projects[0].projectDescription = "Short.";
+    const result = buildLungmapProjectJsonLd(response, BROWSER_URL);
+    expect(result!.description).toBe(
+      "Lung development atlas — Short. — LungMAP Data Explorer project."
+    );
+  });
+});
diff --git a/app/utils/schemaOrg/hcaProjectDataset.ts b/app/utils/schemaOrg/hcaProjectDataset.ts
index 20b9ed879..aa2bcd084 100644
--- a/app/utils/schemaOrg/hcaProjectDataset.ts
+++ b/app/utils/schemaOrg/hcaProjectDataset.ts
@@ -1,81 +1,17 @@
-import type {
-  AccessionResponse,
-  ContributorResponse,
-  PublicationResponse,
-} from "../../apis/azul/hca-dcp/common/entities";
 import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses";
-import { transformAccessionURL } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper";
-import { ACCESSION_CONFIGS_BY_RESPONSE_KEY } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/constants";
-import { MAX_KEYWORDS } from "./constants";
-import type {
-  SchemaDataset,
-  SchemaOrganization,
-  SchemaPerson,
-  SchemaScholarlyArticle,
-} from "./types";
-import { buildDescription, uniqueNonEmpty } from "./utils";
+import type { ProjectCatalogOptions } from "./projectDataset";
+import { buildProjectJsonLd } from "./projectDataset";
+import type { SchemaDataset } from "./types";
 
 const CATALOG_NAME = "Human Cell Atlas Data Coordination Platform";
-const DESCRIPTION_FALLBACK_SUFFIX = `${CATALOG_NAME} project.`;
 
-/**
- * Builds the citation array from project publications. Skips entries without a
- * title. Prefers DOI for `sameAs`, falling back to the publication URL.
- * @param publications - HCA project publications.
- * @returns Array of schema.org ScholarlyArticle objects.
- */
-function buildCitations(
-  publications: PublicationResponse[]
-): SchemaScholarlyArticle[] {
-  const citations: SchemaScholarlyArticle[] = [];
-  for (const publication of publications ?? []) {
-    if (!publication.publicationTitle) continue;
-    const article: SchemaScholarlyArticle = {
-      "@type": "ScholarlyArticle",
-      headline: publication.publicationTitle,
-      name: publication.publicationTitle,
-    };
-    if (publication.doi) {
-      article.sameAs = `https://doi.org/${publication.doi}`;
-    } else if (publication.publicationUrl) {
-      article.sameAs = publication.publicationUrl;
-    }
-    citations.push(article);
-  }
-  return citations;
-}
-
-/**
- * Builds the creator array from project contributors. Skips entries without a
- * name. When the contributor has an institution, attaches it as an affiliation.
- * @param contributors - HCA project contributors.
- * @returns Array of schema.org Person objects.
- */
-function buildCreators(contributors: ContributorResponse[]): SchemaPerson[] {
-  const creators: SchemaPerson[] = [];
-  for (const contributor of contributors ?? []) {
-    if (!contributor.contactName) continue;
-    const person: SchemaPerson = {
-      "@type": "Person",
-      name: normaliseContactName(contributor.contactName),
-    };
-    if (contributor.institution) {
-      const affiliation: SchemaOrganization = {
-        "@type": "Organization",
-        name: contributor.institution,
-      };
-      person.affiliation = affiliation;
-    }
-    creators.push(person);
-  }
-  return creators;
-}
+const OPTIONS: ProjectCatalogOptions = {
+  catalogName: CATALOG_NAME,
+  descriptionFallbackSuffix: `${CATALOG_NAME} project.`,
+};
 
 /**
  * Builds a Schema.org Dataset JSON-LD object for an HCA DCP project.
- *
- * Returns `undefined` when the response does not carry a project we can
- * describe (i.e. no project entity), so the caller can skip rendering.
  * @param data - HCA DCP project detail response from Azul.
  * @param browserURL - Site base URL used for canonical and catalog URLs.
  * @returns Schema.org Dataset JSON-LD object, or `undefined` if not buildable.
@@ -84,125 +20,5 @@ export function buildHcaProjectJsonLd(
   data: ProjectsResponse,
   browserURL: string
 ): SchemaDataset | undefined {
-  const project = data.projects?.[0];
-  if (!project) return undefined;
-
-  const name = project.projectTitle || project.projectShortname;
-  const description = buildDescription(
-    project.projectDescription,
-    name,
-    DESCRIPTION_FALLBACK_SUFFIX
-  );
-  const identifier = uniqueNonEmpty([
-    project.projectId,
-    ...project.accessions.flatMap((accession) =>
-      splitAccessionIds(accession.accession)
-    ),
-  ]);
-
-  const jsonLd: SchemaDataset = {
-    "@context": "https://schema.org",
-    "@type": "Dataset",
-    description,
-    identifier,
-    includedInDataCatalog: {
-      "@type": "DataCatalog",
-      name: CATALOG_NAME,
-      url: browserURL,
-    },
-    isAccessibleForFree: true,
-    name,
-    url: `${browserURL}/projects/${project.projectId}`,
-  };
-
-  const sameAs = buildSameAs(project.accessions);
-  if (sameAs.length > 0) jsonLd.sameAs = sameAs;
-
-  const keywords = buildKeywords(data);
-  if (keywords.length > 0) jsonLd.keywords = keywords;
-
-  const creator = buildCreators(project.contributors);
-  if (creator.length > 0) jsonLd.creator = creator;
-
-  const citation = buildCitations(project.publications);
-  if (citation.length > 0) jsonLd.citation = citation;
-
-  return jsonLd;
-}
-
-/**
- * Builds a keywords array by unioning biologically-meaningful fields from the
- * project's aggregated donor/sample/specimen/protocol responses.
- * @param data - HCA project detail response.
- * @returns Deduplicated keywords array.
- */
-function buildKeywords(data: ProjectsResponse): string[] {
-  const values: (string | null | undefined)[] = [];
-  for (const donor of data.donorOrganisms ?? []) {
-    values.push(...(donor.genusSpecies ?? []));
-    values.push(...(donor.disease ?? []));
-  }
-  for (const sample of data.samples ?? []) {
-    values.push(...(sample.organ ?? []));
-    values.push(...(sample.organPart ?? []));
-    values.push(...(sample.disease ?? []));
-    values.push(...(sample.sampleEntityType ?? []));
-  }
-  for (const specimen of data.specimens ?? []) {
-    values.push(...(specimen.organ ?? []));
-    values.push(...(specimen.organPart ?? []));
-    values.push(...(specimen.disease ?? []));
-  }
-  for (const protocol of data.protocols ?? []) {
-    values.push(...(protocol.libraryConstructionApproach ?? []));
-    values.push(...(protocol.instrumentManufacturerModel ?? []));
-  }
-  return uniqueNonEmpty(values).slice(0, MAX_KEYWORDS);
-}
-
-/**
- * Builds the sameAs array of external accession URLs via identifiers.org.
- * Only includes accessions whose namespace maps to a known identifier prefix.
- * @param accessions - Project accessions from the Azul response.
- * @returns Array of canonical accession URLs.
- */
-function buildSameAs(accessions: AccessionResponse[]): string[] {
-  const urls: string[] = [];
-  for (const { accession, namespace } of accessions) {
-    const prefix =
-      ACCESSION_CONFIGS_BY_RESPONSE_KEY.get(namespace)?.identifierOrgPrefix;
-    if (!prefix) continue;
-    for (const id of splitAccessionIds(accession)) {
-      const url = transformAccessionURL(id, prefix);
-      if (url) urls.push(url);
-    }
-  }
-  return uniqueNonEmpty(urls);
-}
-
-/**
- * Normalises an HCA contributor's contactName from "Last,First,Middle" to
- * "First Middle Last" for use as a Schema.org Person.name value.
- * @param contactName - Raw contactName from the Azul response.
- * @returns Human-readable contributor name.
- */
-function normaliseContactName(contactName: string): string {
-  const parts = contactName.split(",").map((part) => part.trim());
-  if (parts.length < 2) return contactName;
-  const [last, ...rest] = parts;
-  return [...rest, last].filter(Boolean).join(" ");
-}
-
-/**
- * Splits an Azul accession string into individual accession IDs. Azul returns
- * accessions as a semicolon-separated string when a project carries multiple
- * IDs under the same namespace (mirrors the split done by `mapAccessions`).
- * @param accession - Raw accession value from the Azul response.
- * @returns Trimmed, non-empty accession IDs.
- */
-function splitAccessionIds(accession: string): string[] {
-  return accession
-    .split(";")
-    .map((id) => id.trim())
-    .filter(Boolean);
+  return buildProjectJsonLd(data, browserURL, OPTIONS);
 }
diff --git a/app/utils/schemaOrg/lungmapProjectDataset.ts b/app/utils/schemaOrg/lungmapProjectDataset.ts
new file mode 100644
index 000000000..d07cba9d3
--- /dev/null
+++ b/app/utils/schemaOrg/lungmapProjectDataset.ts
@@ -0,0 +1,27 @@
+import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses";
+import type { ProjectCatalogOptions } from "./projectDataset";
+import { buildProjectJsonLd } from "./projectDataset";
+import type { SchemaDataset } from "./types";
+
+const CATALOG_NAME = "LungMAP Data Explorer";
+
+const OPTIONS: ProjectCatalogOptions = {
+  catalogName: CATALOG_NAME,
+  descriptionFallbackSuffix: `${CATALOG_NAME} project.`,
+};
+
+/**
+ * Builds a Schema.org Dataset JSON-LD object for a LungMAP project. LungMAP
+ * shares the HCA Azul backend, so the response shape matches HCA's
+ * `ProjectsResponse` and the shared `buildProjectJsonLd` core does the
+ * mapping; this wrapper just supplies LungMAP-specific catalog identity.
+ * @param data - LungMAP project detail response from Azul.
+ * @param browserURL - Site base URL used for canonical and catalog URLs.
+ * @returns Schema.org Dataset JSON-LD object, or `undefined` if not buildable.
+ */
+export function buildLungmapProjectJsonLd(
+  data: ProjectsResponse,
+  browserURL: string
+): SchemaDataset | undefined {
+  return buildProjectJsonLd(data, browserURL, OPTIONS);
+}
diff --git a/app/utils/schemaOrg/projectDataset.ts b/app/utils/schemaOrg/projectDataset.ts
new file mode 100644
index 000000000..8703a298e
--- /dev/null
+++ b/app/utils/schemaOrg/projectDataset.ts
@@ -0,0 +1,224 @@
+/**
+ * Shared Schema.org Dataset builder for consumers that surface HCA-style
+ * `ProjectResponse` data (HCA DCP, LungMAP). Per-consumer files (e.g.
+ * `hcaProjectDataset.ts`, `lungmapProjectDataset.ts`) supply a
+ * `ProjectCatalogOptions` describing catalog identity and call
+ * `buildProjectJsonLd` to produce the JSON-LD payload.
+ */
+
+import type {
+  AccessionResponse,
+  ContributorResponse,
+  PublicationResponse,
+} from "../../apis/azul/hca-dcp/common/entities";
+import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses";
+import { transformAccessionURL } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper";
+import { ACCESSION_CONFIGS_BY_RESPONSE_KEY } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/constants";
+import type {
+  SchemaDataset,
+  SchemaOrganization,
+  SchemaPerson,
+  SchemaScholarlyArticle,
+} from "./types";
+import { buildDescription, uniqueNonEmpty } from "./utils";
+
+/**
+ * Per-consumer catalog identity used to populate `includedInDataCatalog` and
+ * the description-padding fallback. Callers (e.g. HCA, LungMAP) supply this
+ * via thin wrappers so the shared builder stays consumer-agnostic.
+ */
+export interface ProjectCatalogOptions {
+  catalogName: string;
+  descriptionFallbackSuffix: string;
+}
+
+/**
+ * Builds the citation array from project publications. Skips entries without a
+ * title. Prefers DOI for `sameAs`, falling back to the publication URL.
+ * @param publications - Project publications.
+ * @returns Array of schema.org ScholarlyArticle objects.
+ */
+function buildCitations(
+  publications: PublicationResponse[]
+): SchemaScholarlyArticle[] {
+  const citations: SchemaScholarlyArticle[] = [];
+  for (const publication of publications ?? []) {
+    if (!publication.publicationTitle) continue;
+    const article: SchemaScholarlyArticle = {
+      "@type": "ScholarlyArticle",
+      headline: publication.publicationTitle,
+      name: publication.publicationTitle,
+    };
+    if (publication.doi) {
+      article.sameAs = `https://doi.org/${publication.doi}`;
+    } else if (publication.publicationUrl) {
+      article.sameAs = publication.publicationUrl;
+    }
+    citations.push(article);
+  }
+  return citations;
+}
+
+/**
+ * Builds the creator array from project contributors. Skips entries without a
+ * name. When the contributor has an institution, attaches it as an affiliation.
+ * @param contributors - Project contributors.
+ * @returns Array of schema.org Person objects.
+ */
+function buildCreators(contributors: ContributorResponse[]): SchemaPerson[] {
+  const creators: SchemaPerson[] = [];
+  for (const contributor of contributors ?? []) {
+    if (!contributor.contactName) continue;
+    const person: SchemaPerson = {
+      "@type": "Person",
+      name: normaliseContactName(contributor.contactName),
+    };
+    if (contributor.institution) {
+      const affiliation: SchemaOrganization = {
+        "@type": "Organization",
+        name: contributor.institution,
+      };
+      person.affiliation = affiliation;
+    }
+    creators.push(person);
+  }
+  return creators;
+}
+
+/**
+ * Builds a keywords array by unioning biologically-meaningful fields from the
+ * project's aggregated donor/sample/specimen/protocol responses.
+ * @param data - Project detail response.
+ * @returns Deduplicated keywords array.
+ */
+function buildKeywords(data: ProjectsResponse): string[] {
+  const values: (string | null | undefined)[] = [];
+  for (const donor of data.donorOrganisms ?? []) {
+    values.push(...(donor.genusSpecies ?? []));
+    values.push(...(donor.disease ?? []));
+  }
+  for (const sample of data.samples ?? []) {
+    values.push(...(sample.organ ?? []));
+    values.push(...(sample.organPart ?? []));
+    values.push(...(sample.disease ?? []));
+    values.push(...(sample.sampleEntityType ?? []));
+  }
+  for (const specimen of data.specimens ?? []) {
+    values.push(...(specimen.organ ?? []));
+    values.push(...(specimen.organPart ?? []));
+    values.push(...(specimen.disease ?? []));
+  }
+  for (const protocol of data.protocols ?? []) {
+    values.push(...(protocol.libraryConstructionApproach ?? []));
+    values.push(...(protocol.instrumentManufacturerModel ?? []));
+  }
+  return uniqueNonEmpty(values);
+}
+
+/**
+ * Builds a Schema.org Dataset JSON-LD object from a project detail response.
+ *
+ * Returns `undefined` when the response does not carry a project we can
+ * describe, so the caller can skip rendering.
+ * @param data - Project detail response from Azul.
+ * @param browserURL - Site base URL used for canonical and catalog URLs.
+ * @param options - Consumer-specific catalog identity.
+ * @returns Schema.org Dataset JSON-LD object, or `undefined` if not buildable.
+ */
+export function buildProjectJsonLd(
+  data: ProjectsResponse,
+  browserURL: string,
+  options: ProjectCatalogOptions
+): SchemaDataset | undefined {
+  const project = data.projects?.[0];
+  if (!project) return undefined;
+
+  const name = project.projectTitle || project.projectShortname;
+  const description = buildDescription(
+    project.projectDescription,
+    name,
+    options.descriptionFallbackSuffix
+  );
+  const identifier = uniqueNonEmpty([
+    project.projectId,
+    ...project.accessions.flatMap((accession) =>
+      splitAccessionIds(accession.accession)
+    ),
+  ]);
+
+  const jsonLd: SchemaDataset = {
+    "@context": "https://schema.org",
+    "@type": "Dataset",
+    description,
+    identifier,
+    includedInDataCatalog: {
+      "@type": "DataCatalog",
+      name: options.catalogName,
+      url: browserURL,
+    },
+    isAccessibleForFree: true,
+    name,
+    url: `${browserURL}/projects/${project.projectId}`,
+  };
+
+  const sameAs = buildSameAs(project.accessions);
+  if (sameAs.length > 0) jsonLd.sameAs = sameAs;
+
+  const keywords = buildKeywords(data);
+  if (keywords.length > 0) jsonLd.keywords = keywords;
+
+  const creator = buildCreators(project.contributors);
+  if (creator.length > 0) jsonLd.creator = creator;
+
+  const citation = buildCitations(project.publications);
+  if (citation.length > 0) jsonLd.citation = citation;
+
+  return jsonLd;
+}
+
+/**
+ * Builds the sameAs array of external accession URLs via identifiers.org.
+ * Only includes accessions whose namespace maps to a known identifier prefix.
+ * @param accessions - Project accessions from the Azul response.
+ * @returns Array of canonical accession URLs.
+ */
+function buildSameAs(accessions: AccessionResponse[]): string[] {
+  const urls: string[] = [];
+  for (const { accession, namespace } of accessions) {
+    const prefix =
+      ACCESSION_CONFIGS_BY_RESPONSE_KEY.get(namespace)?.identifierOrgPrefix;
+    if (!prefix) continue;
+    for (const id of splitAccessionIds(accession)) {
+      const url = transformAccessionURL(id, prefix);
+      if (url) urls.push(url);
+    }
+  }
+  return uniqueNonEmpty(urls);
+}
+
+/**
+ * Normalises an Azul contributor's contactName from "Last,First,Middle" to
+ * "First Middle Last" for use as a Schema.org Person.name value.
+ * @param contactName - Raw contactName from the Azul response.
+ * @returns Human-readable contributor name.
+ */
+function normaliseContactName(contactName: string): string {
+  const parts = contactName.split(",").map((part) => part.trim());
+  if (parts.length < 2) return contactName;
+  const [last, ...rest] = parts;
+  return [...rest, last].filter(Boolean).join(" ");
+}
+
+/**
+ * Splits an Azul accession string into individual accession IDs. Azul returns
+ * accessions as a semicolon-separated string when a project carries multiple
+ * IDs under the same namespace (mirrors the split done by `mapAccessions`).
+ * @param accession - Raw accession value from the Azul response.
+ * @returns Trimmed, non-empty accession IDs.
+ */
+function splitAccessionIds(accession: string): string[] {
+  return accession
+    .split(";")
+    .map((id) => id.trim())
+    .filter(Boolean);
+}
diff --git a/pages/[entityListType]/[...params].tsx b/pages/[entityListType]/[...params].tsx
index d272dc225..bc3714561 100644
--- a/pages/[entityListType]/[...params].tsx
+++ b/pages/[entityListType]/[...params].tsx
@@ -32,6 +32,7 @@ import { JSX } from "react";
 import { EntityGuard } from "../../app/components/Detail/components/EntityGuard/entityGuard";
 import { buildAnvilDatasetJsonLd } from "../../app/utils/schemaOrg/anvilDataset";
 import { buildHcaProjectJsonLd } from "../../app/utils/schemaOrg/hcaProjectDataset";
+import { buildLungmapProjectJsonLd } from "../../app/utils/schemaOrg/lungmapProjectDataset";
 import type { SchemaDataset } from "../../app/utils/schemaOrg/types";
 import { readFile } from "../../app/utils/tsvParser";
 import { JsonLd } from "../../app/views/EntityDetailView/components/JsonLd/jsonLd";
@@ -75,11 +76,13 @@ export interface EntityDetailPageProps extends AzulEntityStaticResponse {
 // Catalog", which shares the "AnVIL" prefix but has a different entity shape.
 const APP_TITLE_ANVIL_CMG = "AnVIL Data Explorer";
 const APP_TITLE_HCA_DCP = "HCA Data Explorer";
+const APP_TITLE_LUNGMAP = "LungMAP Data Explorer";
 
 const EntityDetailPage = (props: EntityDetailPageProps): JSX.Element => {
   const { config: siteConfig } = useConfig();
   const isAnVIL = siteConfig.appTitle === APP_TITLE_ANVIL_CMG;
   const isHcaDcp = siteConfig.appTitle === APP_TITLE_HCA_DCP;
+  const isLungMap = siteConfig.appTitle === APP_TITLE_LUNGMAP;
   const { query } = useRouter();
   if (!props.entityListType) return <></>;
   if (props.override) return <EntityGuard override={props.override} />;
@@ -98,6 +101,7 @@ const EntityDetailPage = (props: EntityDetailPageProps): JSX.Element => {
     <>
       {isAnVIL && renderJsonLd(props, "datasets", buildAnvilDatasetJsonLd)}
       {isHcaDcp && renderJsonLd(props, "projects", buildHcaProjectJsonLd)}
+      {isLungMap && renderJsonLd(props, "projects", buildLungmapProjectJsonLd)}
       <EntityDetailView {...props} />
     </>
   );

From 358eb1dba5999ea17d5aabfd067544edf826194b Mon Sep 17 00:00:00 2001
From: Fran McDade <18710366+frano-m@users.noreply.github.com>
Date: Fri, 22 May 2026 16:31:34 +1000
Subject: [PATCH 2/3] fix: cap shared project keywords at max_keywords to match
 anvil builder (#4808)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 app/utils/schemaOrg/projectDataset.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/utils/schemaOrg/projectDataset.ts b/app/utils/schemaOrg/projectDataset.ts
index 8703a298e..d89f874ae 100644
--- a/app/utils/schemaOrg/projectDataset.ts
+++ b/app/utils/schemaOrg/projectDataset.ts
@@ -14,6 +14,7 @@ import type {
 import type { ProjectsResponse } from "../../apis/azul/hca-dcp/common/responses";
 import { transformAccessionURL } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/accessionMapper";
 import { ACCESSION_CONFIGS_BY_RESPONSE_KEY } from "../../viewModelBuilders/azul/hca-dcp/common/accessionMapper/constants";
+import { MAX_KEYWORDS } from "./constants";
 import type {
   SchemaDataset,
   SchemaOrganization,
@@ -112,7 +113,7 @@ function buildKeywords(data: ProjectsResponse): string[] {
     values.push(...(protocol.libraryConstructionApproach ?? []));
     values.push(...(protocol.instrumentManufacturerModel ?? []));
   }
-  return uniqueNonEmpty(values);
+  return uniqueNonEmpty(values).slice(0, MAX_KEYWORDS);
 }
 
 /**

From af5b43aee2139d6626376ccabd994e62ca756f52 Mon Sep 17 00:00:00 2001
From: Fran McDade <18710366+frano-m@users.noreply.github.com>
Date: Fri, 22 May 2026 16:47:29 +1000
Subject: [PATCH 3/3] chore: rewrite description fallback suffixes for anvil,
 hca, lungmap (#4808)

- AnVIL suffix expanded to spell out NHGRI Analysis Visualization and Informatics Lab-space
- HCA renamed catalog to "Human Cell Atlas Data Explorer", suffix matches
- LungMAP suffix uses "A project in the LungMAP Data Explorer."
- Update buildDescription jsdoc to reflect that the entity name's length
  carries the 50-char minimum in practice (suffix alone no longer self-sufficient)
- Update test expectations accordingly

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 __tests__/utils/schemaOrg/anvilDataset.test.ts   |  4 ++--
 .../utils/schemaOrg/hcaProjectDataset.test.ts    |  6 +++---
 .../schemaOrg/lungmapProjectDataset.test.ts      |  2 +-
 app/utils/schemaOrg/anvilDataset.ts              |  2 +-
 app/utils/schemaOrg/hcaProjectDataset.ts         |  4 ++--
 app/utils/schemaOrg/lungmapProjectDataset.ts     |  2 +-
 app/utils/schemaOrg/utils.ts                     | 16 ++++++++++------
 7 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/__tests__/utils/schemaOrg/anvilDataset.test.ts b/__tests__/utils/schemaOrg/anvilDataset.test.ts
index 160bb12e6..16ad30713 100644
--- a/__tests__/utils/schemaOrg/anvilDataset.test.ts
+++ b/__tests__/utils/schemaOrg/anvilDataset.test.ts
@@ -98,7 +98,7 @@ describe("buildAnvilDatasetJsonLd", () => {
     response.datasets[0].description = "Short.";
     const result = buildAnvilDatasetJsonLd(response, BROWSER_URL);
     expect(result!.description).toBe(
-      "Rare disease dataset — Short. — A genomic dataset in the AnVIL Data Explorer catalog."
+      "Rare disease dataset — Short. — A dataset in the AnVIL Data Explorer for NHGRI's Analysis Visualization and Informatics Lab-space."
     );
     expect(result!.description.length).toBeGreaterThanOrEqual(
       DESCRIPTION_LENGTH.MIN
@@ -110,7 +110,7 @@ describe("buildAnvilDatasetJsonLd", () => {
     response.datasets[0].description = undefined;
     const result = buildAnvilDatasetJsonLd(response, BROWSER_URL);
     expect(result!.description).toBe(
-      "Rare disease dataset — A genomic dataset in the AnVIL Data Explorer catalog."
+      "Rare disease dataset — A dataset in the AnVIL Data Explorer for NHGRI's Analysis Visualization and Informatics Lab-space."
     );
     expect(result!.description.length).toBeGreaterThanOrEqual(
       DESCRIPTION_LENGTH.MIN
diff --git a/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts b/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts
index 64d5dde9a..eb7b5ad41 100644
--- a/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts
+++ b/__tests__/utils/schemaOrg/hcaProjectDataset.test.ts
@@ -68,7 +68,7 @@ describe("buildHcaProjectJsonLd", () => {
     expect(result!.isAccessibleForFree).toBe(true);
     expect(result!.includedInDataCatalog).toEqual({
       "@type": "DataCatalog",
-      name: "Human Cell Atlas Data Coordination Platform",
+      name: "Human Cell Atlas Data Explorer",
       url: BROWSER_URL,
     });
   });
@@ -95,7 +95,7 @@ describe("buildHcaProjectJsonLd", () => {
     response.projects[0].projectDescription = "Short.";
     const result = buildHcaProjectJsonLd(response, BROWSER_URL);
     expect(result!.description).toBe(
-      "Cells of the body — Short. — Human Cell Atlas Data Coordination Platform project."
+      "Cells of the body — Short. — A project in the Human Cell Atlas Data Explorer."
     );
     expect(result!.description.length).toBeGreaterThanOrEqual(
       DESCRIPTION_LENGTH.MIN
@@ -107,7 +107,7 @@ describe("buildHcaProjectJsonLd", () => {
     response.projects[0].projectDescription = "";
     const result = buildHcaProjectJsonLd(response, BROWSER_URL);
     expect(result!.description).toBe(
-      "Cells of the body — Human Cell Atlas Data Coordination Platform project."
+      "Cells of the body — A project in the Human Cell Atlas Data Explorer."
     );
     expect(result!.description.length).toBeGreaterThanOrEqual(
       DESCRIPTION_LENGTH.MIN
diff --git a/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts b/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts
index 67688c89b..c52a3eb75 100644
--- a/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts
+++ b/__tests__/utils/schemaOrg/lungmapProjectDataset.test.ts
@@ -68,7 +68,7 @@ describe("buildLungmapProjectJsonLd", () => {
     response.projects[0].projectDescription = "Short.";
     const result = buildLungmapProjectJsonLd(response, BROWSER_URL);
     expect(result!.description).toBe(
-      "Lung development atlas — Short. — LungMAP Data Explorer project."
+      "Lung development atlas — Short. — A project in the LungMAP Data Explorer."
     );
   });
 });
diff --git a/app/utils/schemaOrg/anvilDataset.ts b/app/utils/schemaOrg/anvilDataset.ts
index 189eba2c8..7ada125c0 100644
--- a/app/utils/schemaOrg/anvilDataset.ts
+++ b/app/utils/schemaOrg/anvilDataset.ts
@@ -4,7 +4,7 @@ import type { SchemaDataset } from "./types";
 import { buildDescription, uniqueNonEmpty } from "./utils";
 
 const CATALOG_NAME = "AnVIL Data Explorer";
-const DESCRIPTION_FALLBACK_SUFFIX = `A genomic dataset in the ${CATALOG_NAME} catalog.`;
+const DESCRIPTION_FALLBACK_SUFFIX = `A dataset in the AnVIL Data Explorer for NHGRI's Analysis Visualization and Informatics Lab-space.`;
 
 /**
  * Builds a Schema.org Dataset JSON-LD object for an AnVIL CMG dataset.
diff --git a/app/utils/schemaOrg/hcaProjectDataset.ts b/app/utils/schemaOrg/hcaProjectDataset.ts
index aa2bcd084..5cdf094a8 100644
--- a/app/utils/schemaOrg/hcaProjectDataset.ts
+++ b/app/utils/schemaOrg/hcaProjectDataset.ts
@@ -3,11 +3,11 @@ import type { ProjectCatalogOptions } from "./projectDataset";
 import { buildProjectJsonLd } from "./projectDataset";
 import type { SchemaDataset } from "./types";
 
-const CATALOG_NAME = "Human Cell Atlas Data Coordination Platform";
+const CATALOG_NAME = "Human Cell Atlas Data Explorer";
 
 const OPTIONS: ProjectCatalogOptions = {
   catalogName: CATALOG_NAME,
-  descriptionFallbackSuffix: `${CATALOG_NAME} project.`,
+  descriptionFallbackSuffix: `A project in the Human Cell Atlas Data Explorer.`,
 };
 
 /**
diff --git a/app/utils/schemaOrg/lungmapProjectDataset.ts b/app/utils/schemaOrg/lungmapProjectDataset.ts
index d07cba9d3..6eecf5b40 100644
--- a/app/utils/schemaOrg/lungmapProjectDataset.ts
+++ b/app/utils/schemaOrg/lungmapProjectDataset.ts
@@ -7,7 +7,7 @@ const CATALOG_NAME = "LungMAP Data Explorer";
 
 const OPTIONS: ProjectCatalogOptions = {
   catalogName: CATALOG_NAME,
-  descriptionFallbackSuffix: `${CATALOG_NAME} project.`,
+  descriptionFallbackSuffix: `A project in the LungMAP Data Explorer.`,
 };
 
 /**
diff --git a/app/utils/schemaOrg/utils.ts b/app/utils/schemaOrg/utils.ts
index d29358c49..8f40756e7 100644
--- a/app/utils/schemaOrg/utils.ts
+++ b/app/utils/schemaOrg/utils.ts
@@ -2,13 +2,17 @@ import { DESCRIPTION_LENGTH } from "./constants";
 
 /**
  * Builds a Schema.org description string from a raw entity description, padding
- * short or empty values with the entity name and a caller-supplied fallback
- * suffix so the result satisfies Google's minimum description-length
- * requirement (50 chars).
+ * short or empty values by prepending the entity name and appending a
+ * caller-supplied fallback suffix. The padded result is `name — suffix` (or
+ * `name — source — suffix` when the source description is non-empty but short),
+ * relying on the entity name's length plus the suffix to clear Google's
+ * 50-character description minimum in practice.
  * @param sourceDescription - Raw description (may contain HTML, may be empty).
- * @param name - Entity name used in the padded fallback.
- * @param fallbackSuffix - Caller-owned suffix (e.g. catalog + entity kind) used
- * to reliably push padded descriptions past the 50-character minimum. The
+ * @param name - Entity name used in the padded fallback. Prepended to the
+ * output; its length is the main contributor to clearing the 50-char minimum
+ * when the source description is short or empty.
+ * @param fallbackSuffix - Caller-owned suffix (e.g. catalog + entity kind)
+ * appended after the name and (when present) the source description. The
  * caller controls phrasing and punctuation; the helper does not add a period.
  * @returns HTML-stripped description, padded when short, truncated when long.
  */