diff --git a/civicpy/civic.py b/civicpy/civic.py index 45bbb95..a9cca70 100644 --- a/civicpy/civic.py +++ b/civicpy/civic.py @@ -468,9 +468,9 @@ def _is_valid_for_gks_json(cls, emit_warnings: bool = False) -> bool: warnings.append(f"{prefix} does not have 'accepted' status. Skipping") record_type = cls.evidence_type if isinstance(cls, Evidence) else cls.assertion_type - if record_type not in ("DIAGNOSTIC", "PREDICTIVE", "PROGNOSTIC"): + if record_type not in ("DIAGNOSTIC", "PREDICTIVE", "PROGNOSTIC", "ONCOGENIC"): warnings.append( - f"{prefix} type is not one of: 'DIAGNOSTIC', 'PREDICTIVE', or 'PROGNOSTIC'. Skipping" + f"{prefix} type is not one of: 'DIAGNOSTIC', 'PREDICTIVE', 'PROGNOSTIC', or 'ONCOGENIC'. Skipping" ) len_mp_variants = len(cls.molecular_profile.variants) diff --git a/civicpy/cli.py b/civicpy/cli.py index 0e19a99..e5a93ee 100644 --- a/civicpy/cli.py +++ b/civicpy/cli.py @@ -4,8 +4,11 @@ from civicpy import civic from civicpy.__env__ import LOCAL_CACHE_PATH from civicpy.exports.civic_gks_record import ( + CivicGksClinSigAssertion, + CivicGksOncogenicAssertion, CivicGksRecordError, - CivicGksAssertion + ClinVarSubmissionType, + create_gks_record_from_assertion, ) from civicpy.exports.civic_gks_writer import CivicGksWriter, GksAssertionError from civicpy.exports.civic_vcf_writer import CivicVcfWriter @@ -71,6 +74,16 @@ def create_vcf(vcf_file_path, include_status): help="The CIViC organization ID that approved the assertion(s) for submission to ClinVar.", type=int, ) +@click.option( + "--submission-type", + type=click.Choice( + [s.value for s in ClinVarSubmissionType], + case_sensitive=True, + ), + help="The ClinVar submission type to generate GKS JSON for.", + default=ClinVarSubmissionType.CLINICAL_IMPACT.value, + show_default=True, +) @click.option( "-o", "--output-json", @@ -83,13 +96,22 @@ def create_vcf(vcf_file_path, include_status): path_type=Path, ), ) -def create_gks_json(organization_id: int, output_json: Path) -> None: +def create_gks_json( + organization_id: int, submission_type: ClinVarSubmissionType, output_json: Path +) -> None: """Create a JSON file for CIViC assertion records approved by a specific organization that are ready for ClinVar submission, represented as GKS objects. For now, we will only support simple molecular profiles and diagnostic, prognostic, - or predictive assertions. + predictive, or oncogenic assertions. + + ClinVar only supports submitting records of the same submission type for a given assertion criteria: + * Clinical Impact -> diagnostic, prognostic, or predictive assertion + * Oncogenicity -> oncogenic assertion + Therefore, you must create separate GKS JSON for each submission type :param organization_id: The CIViC organization ID that approved the assertion(s) for submission to ClinVar + :param submission_type: The ClinVar submission type to generate GKS JSON for. + Defaults to clinical impact. :param output_json: The output file path to write the JSON file to """ try: @@ -98,7 +120,7 @@ def create_gks_json(organization_id: int, output_json: Path) -> None: logging.exception("Error getting organization %i", organization_id) return - records: list[CivicGksAssertion] = [] + records: list[CivicGksClinSigAssertion] | list[CivicGksOncogenicAssertion] = [] errors: list[GksAssertionError] = [] for approval in civic.get_all_approvals_ready_for_clinvar_submission_for_org( @@ -107,7 +129,9 @@ def create_gks_json(organization_id: int, output_json: Path) -> None: assertion = approval.assertion if assertion.is_valid_for_gks_json(emit_warnings=True): try: - gks_record = CivicGksAssertion(assertion, approval=approval) + gks_record = create_gks_record_from_assertion( + assertion, approval=approval, submission_type_filter=submission_type + ) except (CivicGksRecordError, NotImplementedError) as e: errors.append( GksAssertionError(assertion_id=assertion.id, message=str(e)) diff --git a/civicpy/data/test_cache.pkl b/civicpy/data/test_cache.pkl index d699c5e..032bad4 100644 Binary files a/civicpy/data/test_cache.pkl and b/civicpy/data/test_cache.pkl differ diff --git a/civicpy/exports/civic_gks_record.py b/civicpy/exports/civic_gks_record.py index b9d0eba..a26fe71 100644 --- a/civicpy/exports/civic_gks_record.py +++ b/civicpy/exports/civic_gks_record.py @@ -1,4 +1,10 @@ -"""Module for representing CIViC assertion record as GKS AAC 2017 Study Statement""" +"""Module for representing CIViC assertion record as GKS representations + +* CIViC Predictive, Prognostic, and Diagnostic Assertions map to Variant Clinical + Significance Statements that follow the AMP/ASCO/CAP 2017 guidelines +* CIViC Oncogenic Assertions map to Variant Oncogenicity Statements that follow the + ClinGen/CGC/VICC Oncogenicity 2022 guidelines +""" from enum import Enum import logging @@ -9,6 +15,7 @@ from ga4gh.core.models import ( Coding, ConceptMapping, + code, iriReference, Extension, MappableConcept, @@ -25,6 +32,7 @@ ) from ga4gh.va_spec.base import ( Agent, + CcvClassification, Contribution, ConditionSet, DiagnosticPredicate, @@ -34,6 +42,7 @@ Method, PrognosticPredicate, Statement, + StrengthCode, System, TherapeuticResponsePredicate, TherapyGroup, @@ -41,6 +50,12 @@ VariantDiagnosticProposition, VariantPrognosticProposition, VariantTherapeuticResponseProposition, + VariantOncogenicityProposition, +) +from ga4gh.va_spec.ccv_2022.derived_evidence import derive_onco_evidence_attributes +from ga4gh.va_spec.ccv_2022 import ( + VariantOncogenicityStatement, + VariantOncogenicityEvidenceLine, ) from ga4gh.vrs.models import Expression, Syntax from pydantic import BaseModel @@ -90,6 +105,30 @@ class CivicEvidenceAssertionType(str, Enum): PREDICTIVE = "PREDICTIVE" PROGNOSTIC = "PROGNOSTIC" DIAGNOSTIC = "DIAGNOSTIC" + ONCOGENIC = "ONCOGENIC" + + +CLINICAL_SIGNIFICANCE_ASSERTION_TYPES = [ + CivicEvidenceAssertionType.PREDICTIVE.value, + CivicEvidenceAssertionType.PROGNOSTIC.value, + CivicEvidenceAssertionType.DIAGNOSTIC.value, +] +ONCOGENIC_ASSERTION_TYPES = [CivicEvidenceAssertionType.ONCOGENIC.value] + + +class ClinVarSubmissionType(str, Enum): + """Define supported submission types to ClinVar""" + + CLINICAL_IMPACT = "clinical_impact" + ONCOGENICITY = "oncogenicity" + + +ASSERTION_TYPES_BY_CLINVAR_SUBMISSION_TYPE = MappingProxyType( + { + ClinVarSubmissionType.CLINICAL_IMPACT: CLINICAL_SIGNIFICANCE_ASSERTION_TYPES, + ClinVarSubmissionType.ONCOGENICITY: ONCOGENIC_ASSERTION_TYPES, + } +) class CivicEvidenceLevel(str, Enum): @@ -133,6 +172,11 @@ class CivicEvidenceName(str, Enum): "BETTER_OUTCOME": PrognosticPredicate.BETTER_OUTCOME, "POSITIVE": DiagnosticPredicate.INCLUSIVE, "NEGATIVE": DiagnosticPredicate.EXCLUSIVE, + "BENIGN": "isOncogenicFor", + "LIKELY_BENIGN": "isOncogenicFor", + "LIKELY_ONCOGENIC": "isOncogenicFor", + "ONCOGENIC": "isOncogenicFor", + "UNCERTAIN_SIGNIFICANCE": "isOncogenicFor", } ) @@ -623,6 +667,8 @@ def __init__( class _CivicGksEvidenceAssertionMixin: + """Mixin for CIViC Evidence and Assertions""" + @staticmethod def get_allele_origin_qualifier(record: Evidence | Assertion) -> MappableConcept: """Get GKS allele origin qualifier @@ -641,7 +687,11 @@ def get_allele_origin_qualifier(record: Evidence | Assertion) -> MappableConcept def get_predicate( record: Evidence | Assertion, ) -> ( - PrognosticPredicate | DiagnosticPredicate | TherapeuticResponsePredicate | None + PrognosticPredicate + | DiagnosticPredicate + | TherapeuticResponsePredicate + | str + | None ): """Get GKS predicate @@ -717,10 +767,14 @@ def _get_proposition_params( "alleleOriginQualifier": self.get_allele_origin_qualifier(record), "predicate": self.get_predicate(record) if not is_clinical_significance_prop - else VariantClinicalSignificanceProposition.model_fields["predicate"].default, + else VariantClinicalSignificanceProposition.model_fields[ + "predicate" + ].default, } - if ( + if record_type == CivicEvidenceAssertionType.ONCOGENIC: + condition_key = "objectTumorType" + elif ( is_clinical_significance_prop or record_type != CivicEvidenceAssertionType.PREDICTIVE ): @@ -877,10 +931,58 @@ def __init__(self, evidence_item: Evidence) -> None: ) -class CivicGksAssertion( - VariantClinicalSignificanceStatement, _CivicGksEvidenceAssertionMixin +class _CivicGksAssertionMixin: + """Mixin for CIViC Assertions""" + + @staticmethod + def get_contributions(approval: Approval) -> list[Contribution]: + """Get contributions for an approval + + :param approval: Approval for assertion + :return: List of contributions, with one item containing when the approval was + last reviewed an organization. + Will include an extension, `is_approved_vcep`. + """ + organization: Organization = approval.organization + return [ + Contribution( + activityType=f"{approval.type}.last_reviewed", + date=approval.last_reviewed.split("T", 1)[0], + contributor=Agent( + id=f"civic.{organization.type}:{organization.id}", + name=organization.name, + description=organization.description, + extensions=[ + Extension( + name="is_approved_vcep", value=organization.is_approved_vcep + ) + ], + ), + ) + ] + + @staticmethod + def get_reported_in(assertion: Assertion) -> list[iriReference]: + """Get reported in information for an assertion + + :param assertion: CIViC assertion record + :return: List of CIViC links to records which the assertion is reported in + """ + reported_in: list[iriReference] = [ + iriReference(f"{LINKS_URL}/assertion/{assertion.id}") + ] + for evidence_item in assertion.evidence_items or []: + reported_in.append(iriReference(f"{LINKS_URL}/evidence/{evidence_item.id}")) + return reported_in + + +class CivicGksClinSigAssertion( + VariantClinicalSignificanceStatement, + _CivicGksAssertionMixin, + _CivicGksEvidenceAssertionMixin, ): - """Class for CIViC assertion record represented as GKS + """Class for CIViC predictive, prognostic, or diagnostic assertion record + represented as GKS :param assertion: CIViC assertion record :raises CivicGksRecordError: If CIViC assertion is not able to be represented as @@ -892,13 +994,19 @@ def __init__( assertion: Assertion, approval: Approval | None = None, ) -> None: - """Initialize _CivicGksAssertionRecord class + """Initialize CivicGksClinSigAssertion class :param assertion: CIViC assertion record :param approval: CIViC approval for the assertion, defaults to None :raises CivicGksRecordError: If CIViC assertion is not able to be represented as GKS object """ + if assertion.assertion_type not in CLINICAL_SIGNIFICANCE_ASSERTION_TYPES: + err_msg = ( + f"Assertion type must be one of {CLINICAL_SIGNIFICANCE_ASSERTION_TYPES}" + ) + raise CivicGksRecordError(err_msg) + if not assertion.is_valid_for_gks_json(emit_warnings=True): err_msg = "Assertion is not valid for GKS." raise CivicGksRecordError(err_msg) @@ -924,35 +1032,10 @@ def __init__( classification=classification, strength=strength, hasEvidenceLines=self.get_evidence_lines(assertion, level), - reportedIn=[iriReference(f"{LINKS_URL}/assertion/{assertion.id}")], + reportedIn=self.get_reported_in(assertion), extensions=extensions or None, ) - @staticmethod - def get_contributions(approval: Approval) -> list[Contribution]: - """Get contributions for an approval - - :param approval: Approval for assertion - :return: List of contributions containing when the approval was last reviewed - """ - organization: Organization = approval.organization - return [ - Contribution( - activityType=f"{approval.type}.last_reviewed", - date=approval.last_reviewed.split("T", 1)[0], - contributor=Agent( - id=f"civic.{organization.type}:{organization.id}", - name=organization.name, - description=organization.description, - extensions=[ - Extension( - name="is_approved_vcep", value=organization.is_approved_vcep - ) - ], - ), - ) - ] - def get_classification_strength_level( self, amp_level: str, @@ -1013,7 +1096,6 @@ def get_evidence_lines( ) evidence_items: list[CivicGksEvidence] = [] - eid_links: list[str] = [] for evidence_item in assertion.evidence_items: try: evidence_items.append(CivicGksEvidence(evidence_item)) @@ -1029,9 +1111,16 @@ def get_evidence_lines( evidence_item.name, str(e), ) - finally: - # Retain all EID references - eid_links.append(f"{LINKS_URL}/evidence/{evidence_item.id}") + + if assertion.assertion_type == CivicEvidenceAssertionType.PREDICTIVE: + evidence_line_cls = TherapeuticEvidenceLine + elif assertion.assertion_type == CivicEvidenceAssertionType.DIAGNOSTIC: + evidence_line_cls = DiagnosticEvidenceLine + elif assertion.assertion_type == CivicEvidenceAssertionType.PROGNOSTIC: + evidence_line_cls = PrognosticEvidenceLine + else: + msg = f"Evidence line type for assertion type is not supported: {assertion.assertion_type}" + raise NotImplementedError(msg) if assertion.assertion_type == CivicEvidenceAssertionType.PREDICTIVE: evidence_line_cls = TherapeuticEvidenceLine @@ -1051,7 +1140,6 @@ def get_evidence_lines( strengthOfEvidenceProvided=MappableConcept( primaryCoding=(Coding(code=level, system=System.AMP_ASCO_CAP)) ), - extensions=[Extension(name="citations", value=eid_links)] ).root ] @@ -1067,3 +1155,179 @@ def get_proposition( assertion, assertion.assertion_type, is_clinical_significance_prop=True ) return VariantClinicalSignificanceProposition(**params) + + +class CivicGksOncogenicAssertion( + VariantOncogenicityStatement, + _CivicGksAssertionMixin, + _CivicGksEvidenceAssertionMixin, +): + """Class for CIViC oncogenic assertion record represented as GKS""" + + def __init__(self, assertion: Assertion, approval: Approval | None = None) -> None: + """Initialize CivicGksOncogenicAssertion class + + :param assertion: CIViC assertion record + :param approval: CIViC approval for the assertion, defaults to None + :raises CivicGksRecordError: If CIViC assertion is not able to be represented as + GKS object + """ + if assertion.assertion_type not in ONCOGENIC_ASSERTION_TYPES: + err_msg = f"Assertion type must be one of {ONCOGENIC_ASSERTION_TYPES}" + raise CivicGksRecordError(err_msg) + + if not assertion.is_valid_for_gks_json(emit_warnings=True): + err_msg = "Assertion is not valid for GKS." + raise CivicGksRecordError(err_msg) + + contributions = self.get_contributions(approval) if approval else None + proposition = self.get_proposition(assertion) + classification, strength = self.get_classification_strength( + assertion.significance + ) + + super().__init__( + id=f"civic.aid:{assertion.id}", + contributions=contributions, + description=assertion.description, + specifiedBy=self._get_ccv_method("guideline"), + proposition=proposition, + direction=self.get_direction(assertion.assertion_direction), + classification=classification, + strength=strength, + hasEvidenceLines=self.get_evidence_lines(assertion, proposition), + reportedIn=self.get_reported_in(assertion), + ) + + def get_classification_strength( + self, significance + ) -> tuple[MappableConcept, MappableConcept | None]: + """Get classification and strength + + :param significance: Assertion's significance + :return: Classification and strength, if found + """ + _strength = None + + classification = MappableConcept( + primaryCoding=Coding( + code=code(CcvClassification[significance]), system=System.CCV + ) + ) + + if significance in {"LIKELY_BENIGN", "LIKELY_ONCOGENIC"}: + _strength = StrengthCode.LIKELY + elif significance in {"BENIGN", "ONCOGENIC"}: + _strength = StrengthCode.DEFINITIVE + + if _strength: + strength = MappableConcept( + primaryCoding=Coding(code=code(_strength.value), system=System.CCV) + ) + else: + strength = None + + return classification, strength + + def get_evidence_lines( + self, assertion: Assertion, proposition: VariantOncogenicityProposition + ) -> list[VariantOncogenicityEvidenceLine]: + """Get evidence lines for a CIViC assertion + + :param assertion: CIViC assertion + :param proposition: Proposition for CIViC assertion + :return: List of CIViC evidence lines + """ + direction = ( + Direction.SUPPORTS + if assertion.assertion_direction == "SUPPORTS" + else Direction.DISPUTES + ) + + evidence_lines = [] + for clingen_code in assertion.clingen_codes or []: + evidence_attrs = derive_onco_evidence_attributes( + VariantOncogenicityEvidenceLine.Criterion(clingen_code.code) + ) + evidence_lines.append( + VariantOncogenicityEvidenceLine( + directionOfEvidenceProvided=direction, + **evidence_attrs.model_dump(), + specifiedBy=self._get_ccv_method(clingen_code.code), + ) + ) + + return evidence_lines + + def get_proposition(self, assertion: Assertion) -> VariantOncogenicityProposition: + """Get GKS proposition + + :param assertion: CIViC assertion record + :return: GKS proposition + """ + params = self._get_proposition_params( + assertion, assertion.assertion_type, is_clinical_significance_prop=False + ) + return VariantOncogenicityProposition(**params) + + @staticmethod + def _get_ccv_method(method_type: str) -> Method: + """Get ClinGen/CGC/VICC Guidelines method + + :param method_type: Value to use for `methodType` + :return: ClinGen/CGC/VICC Guidelines represented as GKS Method + """ + + return Method( + name="ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + reportedIn=Document( + id="pmid:35101336", + name="Horak et al., 2022, Genet Med.", + title="Standards for the classification of pathogenicity of somatic variants in cancer (oncogenicity): Joint recommendations of Clinical Genome Resource (ClinGen), Cancer Genomics Consortium (CGC), and Variant Interpretation for Cancer Consortium (VICC)", + doi="10.1016/j.gim.2022.01.001", + pmid="35101336", + urls=[ + "https://doi.org/10.1016/j.gim.2022.01.001", + "https://pubmed.ncbi.nlm.nih.gov/35101336/", + ], + ), + methodType=method_type, + ) + + +def create_gks_record_from_assertion( + assertion: Assertion, + approval: Approval | None = None, + submission_type_filter: ClinVarSubmissionType | None = None, +) -> CivicGksClinSigAssertion | CivicGksOncogenicAssertion: + """Create GKS Record from CIViC Assertion + + :param assertion: CIViC assertion record + :param approval: CIViC approval for the assertion, defaults to None + :param submission_type_filter: Optional ClinVar submission type used to + restrict which assertion types may be translated + :raises NotImplementedError: If GKS Record translation is not yet supported. + Currently, only the following assertion types are supported: DIAGNOSTIC, + PREDICTIVE, PROGNOSTIC, and ONCOGENIC. + Or if the assertion type is excluded by the provided ClinVar submission type + filter. + :return: GKS Assertion Record object + """ + assertion_type = assertion.assertion_type + + if submission_type_filter: + allowed_assertion_types = ASSERTION_TYPES_BY_CLINVAR_SUBMISSION_TYPE[ + submission_type_filter + ] + if assertion_type not in allowed_assertion_types: + err_msg = f"Assertion type {assertion_type} is not supported for ClinVar submission type {submission_type_filter.value}" + raise NotImplementedError(err_msg) + + if assertion_type in CLINICAL_SIGNIFICANCE_ASSERTION_TYPES: + return CivicGksClinSigAssertion(assertion, approval=approval) + + if assertion_type in ONCOGENIC_ASSERTION_TYPES: + return CivicGksOncogenicAssertion(assertion, approval=approval) + + err_msg = f"Assertion type {assertion_type} is not currently supported" + raise NotImplementedError(err_msg) diff --git a/civicpy/exports/civic_gks_writer.py b/civicpy/exports/civic_gks_writer.py index a96a2ef..da37e97 100644 --- a/civicpy/exports/civic_gks_writer.py +++ b/civicpy/exports/civic_gks_writer.py @@ -8,7 +8,7 @@ from pydantic import BaseModel, Field -from civicpy.exports.civic_gks_record import CivicGksAssertion +from civicpy.exports.civic_gks_record import CivicGksClinSigAssertion, CivicGksOncogenicAssertion def get_pkg_version(name: str) -> str: @@ -43,7 +43,7 @@ class GksAssertionError(BaseModel): class GksOutput(BaseModel): """Define model for representing GKS JSON output""" - gks_records: list[CivicGksAssertion] + gks_records: list[CivicGksClinSigAssertion | CivicGksOncogenicAssertion] metadata: GksOutputMetadata failed_assertion_ids: list[int] = [] errors: list[GksAssertionError] = [] @@ -60,7 +60,7 @@ class CivicGksWriter: def __init__( self, filepath: Path, - gks_records: list[CivicGksAssertion], + gks_records: list[CivicGksClinSigAssertion | CivicGksOncogenicAssertion], errors: list[GksAssertionError] | None = None, ): """Initialize CivicGksWriter class diff --git a/civicpy/tests/test_civic.py b/civicpy/tests/test_civic.py index 22d2f11..51793e5 100644 --- a/civicpy/tests/test_civic.py +++ b/civicpy/tests/test_civic.py @@ -1203,12 +1203,15 @@ def test_is_valid_for_gks_warnings_assertion(caplog): assert not not_accepted.is_valid_for_gks_json(emit_warnings=True) assert "Assertion 117 does not have 'accepted' status. Skipping" in caplog.text - oncogenic_fusion = civic.get_assertion_by_id(101) - assert not oncogenic_fusion.is_valid_for_gks_json(emit_warnings=True) + predisposing_assertion = civic.get_assertion_by_id(17) + assert not predisposing_assertion.is_valid_for_gks_json(emit_warnings=True) assert ( - "Assertion 101 type is not one of: 'DIAGNOSTIC', 'PREDICTIVE', or 'PROGNOSTIC'. Skipping" + "Assertion 17 type is not one of: 'DIAGNOSTIC', 'PREDICTIVE', 'PROGNOSTIC', or 'ONCOGENIC'. Skipping" in caplog.text ) + + oncogenic_fusion = civic.get_assertion_by_id(101) + assert not oncogenic_fusion.is_valid_for_gks_json(emit_warnings=True) assert "Assertion 101 variant is not a ``GeneVariant``. Skipping" in caplog.text complex_mp = civic.get_assertion_by_id(88) @@ -1220,10 +1223,6 @@ def test_is_valid_for_gks_warnings_evidence(caplog): """Test that is_valid_for_gks_json works correctly for evidence items""" not_accepted_oncogenic_fusion = civic.get_evidence_by_id(6936) assert not not_accepted_oncogenic_fusion.is_valid_for_gks_json(emit_warnings=True) - assert ( - "Evidence 6936 type is not one of: 'DIAGNOSTIC', 'PREDICTIVE', or 'PROGNOSTIC'. Skipping" - in caplog.text - ) assert "Evidence 6936 variant is not a ``GeneVariant``. Skipping" in caplog.text assert "Evidence 6936 does not have 'accepted' status. Skipping" in caplog.text diff --git a/civicpy/tests/test_exports.py b/civicpy/tests/test_exports.py index 2739d52..b7273a6 100644 --- a/civicpy/tests/test_exports.py +++ b/civicpy/tests/test_exports.py @@ -1,4 +1,5 @@ from copy import deepcopy +import re from unittest.mock import PropertyMock, patch import pytest from deepdiff import DeepDiff @@ -6,6 +7,7 @@ from ga4gh.va_spec.aac_2017 import ( VariantClinicalSignificanceStatement, ) +from ga4gh.va_spec.ccv_2022 import VariantOncogenicityStatement from civicpy import civic @@ -13,9 +15,12 @@ from civicpy.exports.civic_gks_record import ( CivicGksEvidence, CivicGksMolecularProfile, + CivicGksOncogenicAssertion, CivicGksRecordError, - CivicGksAssertion, + CivicGksClinSigAssertion, CivicGksTherapyGroup, + ClinVarSubmissionType, + create_gks_record_from_assertion, ) @@ -108,6 +113,18 @@ def aid117(): return civic.get_assertion_by_id(117) +@pytest.fixture(scope="module") +def aid202(): + """Create test fixture for oncogenic assertion""" + return civic.get_assertion_by_id(202) + + +@pytest.fixture(scope="module") +def approval4(): + """Create test fixture for active approval""" + return civic.get_approval_by_id(4) + + @pytest.fixture(scope="module") def gks_contributions(): return [ @@ -302,7 +319,7 @@ def gks_gid19(): "NISBD2", "PIG61", "mENA", - "NNCIS" + "NNCIS", ], }, ], @@ -473,23 +490,17 @@ def gks_aid6( "system": "AMP/ASCO/CAP Guidelines, 2017", }, }, - "extensions": [ - { - "name": "citations", - "value": [ - "https://civicdb.org/links/evidence/2997", - "https://civicdb.org/links/evidence/879", - "https://civicdb.org/links/evidence/982", - "https://civicdb.org/links/evidence/883", - "https://civicdb.org/links/evidence/968", - "https://civicdb.org/links/evidence/2629" - ] - } - - ] } ], - "reportedIn": ["https://civicdb.org/links/assertion/6"], + "reportedIn": [ + "https://civicdb.org/links/assertion/6", + "https://civicdb.org/links/evidence/2997", + "https://civicdb.org/links/evidence/879", + "https://civicdb.org/links/evidence/982", + "https://civicdb.org/links/evidence/883", + "https://civicdb.org/links/evidence/968", + "https://civicdb.org/links/evidence/2629", + ], } return VariantClinicalSignificanceStatement(**params) @@ -603,6 +614,323 @@ def gks_aid115_object_condition(): } +def _ccv_method(method_type: str) -> dict: + """Get CCV Method""" + return { + "name": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + "reportedIn": { + "id": "pmid:35101336", + "name": "Horak et al., 2022, Genet Med.", + "title": "Standards for the classification of pathogenicity of somatic variants in cancer (oncogenicity): Joint recommendations of Clinical Genome Resource (ClinGen), Cancer Genomics Consortium (CGC), and Variant Interpretation for Cancer Consortium (VICC)", + "doi": "10.1016/j.gim.2022.01.001", + "pmid": "35101336", + "urls": [ + "https://doi.org/10.1016/j.gim.2022.01.001", + "https://pubmed.ncbi.nlm.nih.gov/35101336/", + ], + "type": "Document", + }, + "methodType": method_type, + "type": "Method", + } + + +@pytest.fixture(scope="module") +def gks_gid42(): + """Create test fixture for CIViC GID42 GKS representation.""" + return { + "id": "civic.gid:42", + "conceptType": "Gene", + "name": "RET", + "mappings": [ + { + "coding": { + "id": "ncbigene:5979", + "code": "5979", + "system": "https://www.ncbi.nlm.nih.gov/gene/", + }, + "relation": "exactMatch", + }, + ], + "extensions": [ + { + "name": "description", + "value": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistance. Highly selective and well-tolerated RET inhibitors, selpercatinib (LOXO-292) and pralsetinib (BLU-667), have been FDA approved recently for the treatment of RET fusion-positive non-small-cell lung cancer, RET fusion-positive thyroid cancer and RET-mutant medullary thyroid cancer.", + }, + { + "name": "aliases", + "value": [ + "CDHF12", + "CDHR16", + "HSCR1", + "MEN2A", + "MEN2B", + "MTC1", + "PTC", + "RET", + "RET-ELE1", + ], + }, + ], + } + + +@pytest.fixture(scope="module") +def gks_aid202_proposition(gks_gid42): + """Create test fixture forCIVIC AID6 proposition""" + return { + "type": "VariantOncogenicityProposition", + "geneContextQualifier": gks_gid42, + "objectTumorType": { + "id": "civic.did:15", + "conceptType": "Disease", + "name": "Medullary Thyroid Carcinoma", + "mappings": [ + { + "coding": { + "code": "DOID:3973", + "system": "https://disease-ontology.org/?id=", + }, + "relation": "exactMatch", + } + ], + }, + "alleleOriginQualifier": { + "name": "somatic", + "extensions": [{"name": "civic_variant_origin", "value": "SOMATIC"}], + }, + "predicate": "isOncogenicFor", + "subjectVariant": { + "id": "civic.mpid:113", + "type": "CategoricalVariant", + "description": "RET M918T is the most common somatically acquired mutation in medullary thyroid cancer (MTC). While there currently are no RET-specific inhibiting agents, promiscuous kinase inhibitors have seen some success in treating RET overactivity. Data suggests however, that the M918T mutation may lead to drug resistance, especially against the VEGFR-inhibitor motesanib. It has also been suggested that RET M918T leads to more aggressive MTC with a poorer prognosis.", + "name": "RET M918T", + "aliases": ["MET918THR"], + "mappings": [ + { + "coding": { + "code": "rs74799832", + "system": "https://www.ncbi.nlm.nih.gov/snp/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "CA009082", + "system": "https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_canonicalid?canonicalid=", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "code": "13919", + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", + }, + "relation": "relatedMatch", + }, + { + "coding": { + "id": "civic.mpid:113", + "code": "113", + "system": "https://civicdb.org/links/molecular_profile/", + }, + "relation": "exactMatch", + }, + { + "coding": { + "code": "113", + "id": "civic.vid:113", + "name": "M918T", + "system": "https://civicdb.org/links/variant/", + "extensions": [ + {"name": "subtype", "value": "gene_variant"}, + { + "name": "variant_types", + "value": [ + { + "coding": { + "id": "civic.variant_type:47", + "code": "SO:0001583", + "name": "Missense Variant", + "system": "http://www.sequenceontology.org/browser/current_svn/term/", + }, + "relation": "exactMatch", + } + ], + }, + ], + }, + "relation": "exactMatch", + }, + ], + "extensions": [ + { + "name": "CIViC representative coordinate", + "value": { + "chromosome": "10", + "start": 43617416, + "stop": 43617416, + "reference_bases": "T", + "variant_bases": "C", + "representative_transcript": "ENST00000355710.3", + "ensembl_version": 75, + "reference_build": "GRCh37", + "type": "coordinates", + }, + }, + { + "name": "CIViC Molecular Profile Score", + "value": 139.0, + }, + { + "name": "expressions", + "value": [ + {"syntax": "hgvs.c", "value": "ENST00000355710.3:c.2753T>C"}, + {"syntax": "hgvs.c", "value": "NM_020975.4:c.2753T>C"}, + {"syntax": "hgvs.g", "value": "NC_000010.10:g.43617416T>C"}, + {"syntax": "hgvs.g", "value": "NC_000010.11:g.43121968T>C"}, + {"syntax": "hgvs.p", "value": "ENSP00000347942.3:p.Met918Thr"}, + {"syntax": "hgvs.p", "value": "NP_065681.1:p.Met918Thr"}, + { + "syntax": "hgvs.c", + "value": "ENST00000355710.8:c.2753T>C", + "extensions": [{"name": "is_mane_select", "value": True}], + }, + ], + }, + ], + }, + } + + +@pytest.fixture(scope="module") +def gks_aid202(gks_aid202_proposition): + """Create CIVIC AID6 GKS representation.""" + params = { + "id": "civic.aid:202", + "type": "Statement", + "description": "Published sequencing studies have shown that RET mutations are very common in medullary thryoid carcinoma (MTC) and M918T is the most common specific variant, especially in the MEN2B clinical subtype of familial disease (civic.EID:78) but also in sporadic cases(civic.EID:12800). M918T mutations may predict worse outcomes (civic.EID:74). Biochemical and functional characterization demonstrates that the M918T mutation leads to functional activation of RET relative to wild-type through multiple complementary mechanisms, including increased ATP affinity (>10-fold) and complex stability, reduced conformational rigidity, and the promotion of ligand-independent dimerization and autophosphorylation (civic.EID:12805). Exogenous expression has been shown to induce transformation of Ba/F3 cells (civic.EID:11723), and drive colony formation in NIH3T3 cells (civic.EID:12709, OS2). RET M918T occurs in the region of the tyrosine kinase domain which is associated with multiple endocrine neoplasia type 2 B (OM1). RET M918T is predicted to be deleterious (CHASMplus score 0.314 > VECS gene-specific cutoff of 0.22, OP1). Eleven instances of the variant occur in cancerhotspots.org (V2): 6 Thyroid, 4 Adrenal Gland, 1 Breast (OP3). The variant is absent in gnomAD database (v4.1.0, OP4). Together these criteria indicate that M918T is likely oncogenic, with a score of 9.", + "proposition": gks_aid202_proposition, + "strength": { + "primaryCoding": { + "code": "likely", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "classification": { + "primaryCoding": { + "code": "likely oncogenic", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "reportedIn": [ + "https://civicdb.org/links/assertion/202", + "https://civicdb.org/links/evidence/74", + "https://civicdb.org/links/evidence/12800", + "https://civicdb.org/links/evidence/78", + "https://civicdb.org/links/evidence/12711", + "https://civicdb.org/links/evidence/12805", + "https://civicdb.org/links/evidence/11723", + "https://civicdb.org/links/evidence/12709", + ], + "direction": "supports", + "specifiedBy": _ccv_method("guideline"), + "hasEvidenceLines": [ + { + "type": "EvidenceLine", + "directionOfEvidenceProvided": "supports", + "strengthOfEvidenceProvided": { + "primaryCoding": { + "code": "moderate", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "evidenceOutcome": { + "primaryCoding": { + "code": "OM1", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "scoreOfEvidenceProvided": 2, + "specifiedBy": _ccv_method("OM1"), + }, + { + "type": "EvidenceLine", + "directionOfEvidenceProvided": "supports", + "strengthOfEvidenceProvided": { + "primaryCoding": { + "code": "strong", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "evidenceOutcome": { + "primaryCoding": { + "code": "OS2", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "scoreOfEvidenceProvided": 4, + "specifiedBy": _ccv_method("OS2"), + }, + { + "type": "EvidenceLine", + "directionOfEvidenceProvided": "supports", + "strengthOfEvidenceProvided": { + "primaryCoding": { + "code": "supporting", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "evidenceOutcome": { + "primaryCoding": { + "code": "OP4", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "scoreOfEvidenceProvided": 1, + "specifiedBy": _ccv_method("OP4"), + }, + { + "type": "EvidenceLine", + "directionOfEvidenceProvided": "supports", + "strengthOfEvidenceProvided": { + "primaryCoding": { + "code": "supporting", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "evidenceOutcome": { + "primaryCoding": { + "code": "OP1", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "scoreOfEvidenceProvided": 1, + "specifiedBy": _ccv_method("OP1"), + }, + { + "type": "EvidenceLine", + "directionOfEvidenceProvided": "supports", + "strengthOfEvidenceProvided": { + "primaryCoding": { + "code": "supporting", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "evidenceOutcome": { + "primaryCoding": { + "code": "OP3", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, + "scoreOfEvidenceProvided": 1, + "specifiedBy": _ccv_method("OP3"), + }, + ], + } + return VariantOncogenicityStatement(**params) + + class TestCivicVcfRecord(object): def test_protein_altering(self, caplog, v600e): record = CivicVcfRecord(v600e) @@ -751,12 +1079,12 @@ def test_invalid(self, eid9285): CivicGksEvidence(eid9285) -class TestCivicGksAssertion(object): - """Test that CivicGksAssertion works as expected""" +class TestCivicGksClinSigAssertion(object): + """Test that CivicGksClinSigAssertion works as expected""" def test_valid_single_therapy(self, aid6, gks_aid6): """Test that single therapy works as expected""" - record = CivicGksAssertion(aid6) + record = CivicGksClinSigAssertion(aid6) assert isinstance(record, VariantClinicalSignificanceStatement) assert len(record.hasEvidenceLines) == 1 @@ -778,7 +1106,7 @@ def test_valid_single_therapy(self, aid6, gks_aid6): def test_valid_combination_therapy(self, aid7): """Test that combination therapy works as expected""" - record = CivicGksAssertion(aid7) + record = CivicGksClinSigAssertion(aid7) assert isinstance(record, VariantClinicalSignificanceStatement) assert len(record.hasEvidenceLines) == 1 assert len(record.hasEvidenceLines[0].hasEvidenceItems) == 4 @@ -824,8 +1152,7 @@ def test_valid_substitution_therapy( test_is_valid_for_gks_json.return_value = True test_evidence_items.return_value = [] test_hgvs_expressions.return_value = None - test_mane_select_transcript.return_value = None - record = CivicGksAssertion(aid19) + record = CivicGksClinSigAssertion(aid19) assert isinstance(record, VariantClinicalSignificanceStatement) assert len(record.hasEvidenceLines) == 1 therapy = record.hasEvidenceLines[0].targetProposition.objectTherapeutic.root @@ -837,7 +1164,7 @@ def test_valid_substitution_therapy( def test_valid_prognostic(self, aid20): """Test that valid prognostic assertion works as expected""" - record = CivicGksAssertion(aid20) + record = CivicGksClinSigAssertion(aid20) assert isinstance(record, VariantClinicalSignificanceStatement) assert len(record.hasEvidenceLines) == 1 assert len(record.hasEvidenceLines[0].hasEvidenceItems) == 6 @@ -850,18 +1177,17 @@ def test_valid_prognostic(self, aid20): @patch.object(civic.Assertion, "evidence_items", new_callable=PropertyMock) @patch.object(civic.Evidence, "is_valid_for_gks_json") - def test_citations(self, test_is_valid_for_gks_json,test_evidence_items, aid20): + def test_citations(self, test_is_valid_for_gks_json, test_evidence_items, aid20): """Test that citations extension is working correctly for EIDs that are not valid for GKS""" test_evidence_items.return_value = [civic.get_evidence_by_id(11881)] test_is_valid_for_gks_json.return_value = False - record = CivicGksAssertion(aid20) + record = CivicGksClinSigAssertion(aid20) assert len(record.hasEvidenceLines) == 1 assert record.hasEvidenceLines[0].hasEvidenceItems is None - assert len(record.hasEvidenceLines[0].extensions) == 1 - assert record.hasEvidenceLines[0].extensions[0].model_dump(exclude_none=True) == { - "name": "citations", - "value": ["https://civicdb.org/links/evidence/11881"] + assert {r.model_dump(exclude_none=True) for r in record.reportedIn or []} == { + "https://civicdb.org/links/evidence/11881", + "https://civicdb.org/links/assertion/20", } @@ -877,7 +1203,7 @@ def test_valid( gks_aid115_object_condition, ): """Test that valid diagnostic assertion works as expected""" - record = CivicGksAssertion(aid9) + record = CivicGksClinSigAssertion(aid9) assert isinstance(record, VariantClinicalSignificanceStatement) assert len(record.hasEvidenceLines) == 1 assert len(record.hasEvidenceLines[0].hasEvidenceItems) == 2 @@ -895,7 +1221,7 @@ def test_valid( ) # Single phenotype (complex condition set) - record = CivicGksAssertion(aid93) + record = CivicGksClinSigAssertion(aid93) assert isinstance(record, VariantClinicalSignificanceStatement) record_object_condition = record.proposition.objectCondition assert isinstance(record_object_condition, Condition) @@ -908,7 +1234,7 @@ def test_valid( assert diff == {} # Phenotypes (complex condition set) - record = CivicGksAssertion(aid115) + record = CivicGksClinSigAssertion(aid115) assert isinstance(record, VariantClinicalSignificanceStatement) record_object_condition = record.proposition.objectCondition assert isinstance(record_object_condition, Condition) @@ -920,9 +1246,173 @@ def test_valid( ) assert diff == {} + def test_invalid(self, aid117): + """Test that unsupported assertion types raise exceptions""" + + with pytest.raises( + CivicGksRecordError, + match=re.escape( + "Assertion type must be one of ['PREDICTIVE', 'PROGNOSTIC', 'DIAGNOSTIC']" + ), + ): + CivicGksClinSigAssertion(aid117) + + +class TestCivicGksOncogenicAssertion(object): + """Test that CivicGksOncogenicAssertion works as expected""" + + def test_valid(self, aid202, gks_aid202): + """Test that valid oncogenic assertions works as expected""" + + def evidence_key(item: dict) -> str: + return item["evidenceOutcome"]["primaryCoding"]["code"] + + record = CivicGksOncogenicAssertion(aid202, approval=None) + assert isinstance(record, VariantOncogenicityStatement) + + actual = record.model_dump(exclude_none=True) + expected = gks_aid202.model_dump(exclude_none=True) + + assert set(actual.keys()) == set(expected.keys()) + + # Split out due to large record + for key in expected: + if key == "hasEvidenceLines": + actual_evidence = actual[key] + expected_evidence = expected[key] + + assert len(actual_evidence) == len(expected_evidence), ( + f"Mismatch in hasEvidenceLines length: " + f"actual={len(actual_evidence)}, expected={len(expected_evidence)}" + ) + + actual_by_code = {evidence_key(item): item for item in actual_evidence} + expected_by_code = { + evidence_key(item): item for item in expected_evidence + } + + assert set(actual_by_code) == set(expected_by_code), ( + "Mismatch in hasEvidence evidenceOutcome.primaryCoding.code values" + ) + + for code in expected_by_code: + diff = DeepDiff( + actual_by_code[code], + expected_by_code[code], + ignore_order=True, + ) + + assert diff == {}, ( + "Mismatch in hasEvidence item with " + f"evidenceOutcome.primaryCoding.code={code}" + ) + + continue + + diff = DeepDiff( + actual[key], + expected[key], + ignore_order=True, + ) + + assert diff == {}, f"Mismatch in key: {key}" + + def test_invalid(self, aid6): + """Test that unsupported assertion types raise exceptions""" + + with pytest.raises( + CivicGksRecordError, + match=re.escape("Assertion type must be one of ['ONCOGENIC']"), + ): + CivicGksOncogenicAssertion(aid6) + + +class TestCivicGksRecord(object): + """Test that GKS Record helper functions work correctly""" + + def test_invalid(self): + """Test that unsupported assertion types raise NotImplementedError""" + + with pytest.raises( + NotImplementedError, + match=r"Assertion type PREDISPOSING is not currently supported", + ): + create_gks_record_from_assertion(civic.get_assertion_by_id(17)) + + @pytest.mark.parametrize( + ( + "civic_assertion_fixture_name", + "submission_type_filter", + "should_raise_error", + ), + ( + [ + "aid202", + ClinVarSubmissionType.ONCOGENICITY, + False, + ], + [ + "aid202", + ClinVarSubmissionType.CLINICAL_IMPACT, + True, + ], + [ + "aid9", + ClinVarSubmissionType.CLINICAL_IMPACT, + False, + ], + [ + "aid9", + ClinVarSubmissionType.ONCOGENICITY, + True, + ], + [ + "aid20", + ClinVarSubmissionType.CLINICAL_IMPACT, + False, + ], + [ + "aid20", + ClinVarSubmissionType.ONCOGENICITY, + True, + ], + [ + "aid6", + ClinVarSubmissionType.CLINICAL_IMPACT, + False, + ], + [ + "aid6", + ClinVarSubmissionType.ONCOGENICITY, + True, + ], + ), + ) + def test_create_gks_record_from_assertion_filter( + self, + request, + civic_assertion_fixture_name, + submission_type_filter, + should_raise_error, + ): + """Test that create_gks_record_from_assertion works correctly when submission filter is applied""" + civic_aid = request.getfixturevalue(civic_assertion_fixture_name) + if should_raise_error: + with pytest.raises( + NotImplementedError, + match=rf"Assertion type {civic_aid.assertion_type} is not supported for ClinVar submission type {submission_type_filter.value}", + ): + create_gks_record_from_assertion( + civic_aid, submission_type_filter=submission_type_filter + ) + else: + assert create_gks_record_from_assertion( + civic_aid, submission_type_filter=submission_type_filter + ) + def test_clinvar_accession_ext(self): a = civic.get_assertion_by_id(193) - record = CivicGksAssertion(a, approval=a.approvals[0]) + record = create_gks_record_from_assertion(a, approval=a.approvals[0]) assert isinstance(record, VariantClinicalSignificanceStatement) assert [ext.model_dump(exclude_none=True) for ext in record.extensions] == [ {"name": "clinvar_accession", "value": "SCV007542591"} @@ -934,4 +1424,4 @@ def test_invalid(self, aid117): with pytest.raises( CivicGksRecordError, match=r"Assertion is not valid for GKS." ): - CivicGksAssertion(aid117) + create_gks_record_from_assertion(aid117)