From 920fed9cb43c2c4b9012640d2bbbd75cda0cb652 Mon Sep 17 00:00:00 2001 From: esoteric-ephemera Date: Thu, 12 Feb 2026 22:45:22 -0800 Subject: [PATCH 1/4] start restructuring project ETLs in mpcontribs-kernel/notebooks --- mpcontribs-lux/mpcontribs/lux/pipelines.py | 42 +++++ .../lux/projects/attachments/__init__.py | 1 + .../lux/projects/attachments/schemas.py | 21 +++ .../mpcontribs/lux/projects/auph3/__init__.py | 1 + .../mpcontribs/lux/projects/auph3/schemas.py | 21 +++ .../lux/projects/barin_tables/__init__.py | 1 + .../lux/projects/barin_tables/schemas.py | 21 +++ .../lux/projects/bioi_defects/__init__.py | 1 + .../lux/projects/bioi_defects/pipelines.py | 28 ++++ .../lux/projects/bioi_defects/schemas.py | 21 +++ .../broberg_benchmark_defects/__init__.py | 1 + .../broberg_benchmark_defects/pipelines.py | 28 ++++ .../broberg_benchmark_defects/schemas.py | 21 +++ .../mpcontribs/lux/projects/cards/__init__.py | 1 + .../mpcontribs/lux/projects/cards/schemas.py | 21 +++ .../projects/carrier_transport/__init__.py | 1 + .../projects/carrier_transport/pipelines.py | 28 ++++ .../lux/projects/carrier_transport/schemas.py | 21 +++ .../defect_genome_pcfc_materials/__init__.py | 1 + .../defect_genome_pcfc_materials/pipelines.py | 28 ++++ .../defect_genome_pcfc_materials/schemas.py | 21 +++ .../lux/projects/delta_hvacancy/__init__.py | 1 + .../lux/projects/delta_hvacancy/pipelines.py | 28 ++++ .../lux/projects/delta_hvacancy/schemas.py | 21 +++ .../dilute_solute_diffusion/__init__.py | 1 + .../dilute_solute_diffusion/pipelines.py | 28 ++++ .../dilute_solute_diffusion/schemas.py | 21 +++ .../mpcontribs/lux/projects/dtu/__init__.py | 1 + .../mpcontribs/lux/projects/dtu/pipelines.py | 28 ++++ .../mpcontribs/lux/projects/dtu/schemas.py | 21 +++ .../ediffcrystalprediction/__init__.py | 1 + .../ediffcrystalprediction/pipelines.py | 28 ++++ .../ediffcrystalprediction/schemas.py | 21 +++ .../lux/projects/esters/__init__.py | 1 + .../lux/projects/esters/pipelines.py | 28 ++++ .../mpcontribs/lux/projects/esters/schemas.py | 21 +++ .../lux/projects/exp_xas/__init__.py | 1 + .../lux/projects/exp_xas/pipelines.py | 28 ++++ .../lux/projects/exp_xas/schemas.py | 21 +++ .../projects/experimental_thermo/__init__.py | 1 + .../projects/experimental_thermo/pipelines.py | 28 ++++ .../projects/experimental_thermo/schemas.py | 21 +++ .../experimental_thermoelectrics/__init__.py | 1 + .../experimental_thermoelectrics/pipelines.py | 28 ++++ .../experimental_thermoelectrics/schemas.py | 21 +++ .../lux/projects/ferroelectrics/__init__.py | 1 + .../lux/projects/ferroelectrics/pipelines.py | 28 ++++ .../lux/projects/ferroelectrics/schemas.py | 21 +++ .../forbidden_transitions/__init__.py | 1 + .../forbidden_transitions/pipelines.py | 28 ++++ .../projects/forbidden_transitions/schemas.py | 21 +++ .../mpcontribs/lux/projects/gbdb/__init__.py | 1 + .../mpcontribs/lux/projects/gbdb/pipelines.py | 28 ++++ .../mpcontribs/lux/projects/gbdb/schemas.py | 21 +++ .../lux/projects/hfp2023/__init__.py | 1 + .../lux/projects/hfp2023/pipelines.py | 28 ++++ .../lux/projects/hfp2023/schemas.py | 21 +++ .../lux/projects/intermatch/__init__.py | 1 + .../lux/projects/intermatch/pipelines.py | 28 ++++ .../lux/projects/intermatch/schemas.py | 21 +++ .../lux/projects/ion_ref_data/__init__.py | 1 + .../lux/projects/ion_ref_data/schemas.py | 21 +++ .../lux/projects/jarvis_dft/__init__.py | 1 + .../lux/projects/jarvis_dft/pipelines.py | 28 ++++ .../lux/projects/jarvis_dft/schemas.py | 21 +++ .../lux/projects/jarvis_dft_2023/__init__.py | 1 + .../lux/projects/jarvis_dft_2023/pipelines.py | 28 ++++ .../lux/projects/jarvis_dft_2023/schemas.py | 21 +++ .../lux/projects/matscholar/__init__.py | 1 + .../lux/projects/matscholar/pipelines.py | 28 ++++ .../lux/projects/matscholar/schemas.py | 21 +++ .../lux/projects/melting_points/__init__.py | 1 + .../lux/projects/melting_points/pipelines.py | 28 ++++ .../lux/projects/melting_points/schemas.py | 21 +++ .../mg_cathode_screening_2022/__init__.py | 1 + .../mg_cathode_screening_2022/pipelines.py | 28 ++++ .../mg_cathode_screening_2022/schemas.py | 21 +++ .../projects/mno2_phase_selection/__init__.py | 1 + .../mno2_phase_selection/pipelines.py | 28 ++++ .../projects/mno2_phase_selection/schemas.py | 21 +++ .../lux/projects/mofexplorer/__init__.py | 1 + .../lux/projects/mofexplorer/pipelines.py | 28 ++++ .../lux/projects/mofexplorer/schemas.py | 21 +++ .../lux/projects/ocp_update/__init__.py | 1 + .../lux/projects/ocp_update/pipelines.py | 28 ++++ .../lux/projects/ocp_update/schemas.py | 21 +++ .../lux/projects/ocp_upload/__init__.py | 1 + .../lux/projects/ocp_upload/pipelines.py | 28 ++++ .../lux/projects/ocp_upload/schemas.py | 21 +++ .../open_catalyst_project/__init__.py | 1 + .../open_catalyst_project/pipelines.py | 28 ++++ .../projects/open_catalyst_project/schemas.py | 21 +++ .../perovskites_diffusion/__init__.py | 1 + .../perovskites_diffusion/pipelines.py | 28 ++++ .../projects/perovskites_diffusion/schemas.py | 21 +++ .../lux/projects/pycroscopy/__init__.py | 1 + .../lux/projects/pycroscopy/pipelines.py | 28 ++++ .../lux/projects/pycroscopy/schemas.py | 21 +++ .../projects/pydatarecognition/__init__.py | 1 + .../projects/pydatarecognition/pipelines.py | 28 ++++ .../lux/projects/pydatarecognition/schemas.py | 21 +++ .../projects/qsgw_band_structures/__init__.py | 1 + .../projects/qsgw_band_structures/schemas.py | 21 +++ .../screening_inorganic_pv/__init__.py | 1 + .../screening_inorganic_pv/pipelines.py | 28 ++++ .../screening_inorganic_pv/schemas.py | 21 +++ .../lux/projects/silicon_defects/__init__.py | 1 + .../lux/projects/silicon_defects/pipelines.py | 28 ++++ .../lux/projects/silicon_defects/schemas.py | 21 +++ .../lux/projects/simple_test/__init__.py | 1 + .../lux/projects/simple_test/schemas.py | 21 +++ .../projects/springer_materials/__init__.py | 1 + .../projects/springer_materials/pipelines.py | 28 ++++ .../projects/springer_materials/schemas.py | 21 +++ .../mpcontribs/lux/projects/swf/__init__.py | 1 + .../mpcontribs/lux/projects/swf/pipelines.py | 28 ++++ .../mpcontribs/lux/projects/swf/schemas.py | 21 +++ .../transparent_conductors/__init__.py | 1 + .../transparent_conductors/pipelines.py | 28 ++++ .../transparent_conductors/schemas.py | 21 +++ .../lux/projects/twodmatpedia/__init__.py | 21 +++ .../lux/projects/twodmatpedia/pipelines.py | 73 +++++++++ .../lux/projects/twodmatpedia/schemas.py | 153 ++++++++++++++++++ mpcontribs-lux/mpcontribs/lux/schemas.py | 68 ++++++++ 124 files changed, 2261 insertions(+) create mode 100644 mpcontribs-lux/mpcontribs/lux/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/attachments/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/attachments/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/auph3/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/auph3/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/barin_tables/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/barin_tables/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/cards/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/cards/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/dtu/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/dtu/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/dtu/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/esters/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/esters/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/esters/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/exp_xas/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/exp_xas/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/exp_xas/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/gbdb/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/gbdb/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/gbdb/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/hfp2023/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/hfp2023/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/hfp2023/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/intermatch/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/intermatch/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/intermatch/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/matscholar/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/matscholar/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/matscholar/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/melting_points/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/melting_points/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/melting_points/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ocp_update/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ocp_update/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ocp_update/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/simple_test/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/simple_test/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/springer_materials/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/springer_materials/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/springer_materials/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/swf/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/swf/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/swf/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py create mode 100644 mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py create mode 100644 mpcontribs-lux/mpcontribs/lux/schemas.py diff --git a/mpcontribs-lux/mpcontribs/lux/pipelines.py b/mpcontribs-lux/mpcontribs/lux/pipelines.py new file mode 100644 index 000000000..9d3e641fc --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/pipelines.py @@ -0,0 +1,42 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from __future__ import annotations +from typing import TYPE_CHECKING + +from mpcontribs.client import Client as MPCClient +from mpcontribs.lux.schemas import ContributionRecord + +if TYPE_CHECKING: + from collections.abc import Iterable + + +class LuxETL: + """Perform basic extract, transform, load operations for MPContribs uploads.""" + + def __init__(self, project : str | None = None, client: MPCClient | None = None) -> None: + + if not project: + raise ValueError( + "Project name cannot be null or an empty string!" + ) + self.project = project + self.client = client or MPCClient(project=self.project) + + def extract(self) -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + def transform(self, raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + def aggregate(self, records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + def run(self) -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return self.aggregate(self.transform(self.extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/attachments/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/attachments/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/attachments/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/attachments/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/attachments/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/attachments/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/auph3/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/auph3/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/auph3/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/auph3/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/auph3/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/auph3/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/cards/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/cards/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/cards/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/cards/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/cards/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/cards/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dtu/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/dtu/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dtu/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dtu/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/dtu/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dtu/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dtu/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/dtu/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dtu/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/esters/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/esters/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/esters/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/esters/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/esters/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/esters/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/esters/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/esters/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/esters/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/gbdb/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/gbdb/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/gbdb/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/intermatch/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/intermatch/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/intermatch/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/matscholar/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/matscholar/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/matscholar/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/melting_points/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/melting_points/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/melting_points/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/simple_test/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/simple_test/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/simple_test/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/simple_test/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/simple_test/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/simple_test/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/swf/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/swf/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/swf/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/swf/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/swf/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/swf/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/swf/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/swf/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/swf/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py new file mode 100644 index 000000000..f4f7f5ce1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py @@ -0,0 +1,21 @@ +"""2dmatpedia project schemas and pipelines.""" + +from .pipelines import TwoDMatPediaETL +from .schemas import ( + DETAILS_URL, + INIT_COLUMNS, + PROJECT_DESCRIPTION, + PROJECT_LEGEND, + PROJECT_METADATA, + TwoDMatPediaRecord, +) + +__all__ = [ + "DETAILS_URL", + "INIT_COLUMNS", + "PROJECT_DESCRIPTION", + "PROJECT_LEGEND", + "PROJECT_METADATA", + "TwoDMatPediaETL", + "TwoDMatPediaRecord", +] diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py new file mode 100644 index 000000000..cd6c22068 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py @@ -0,0 +1,73 @@ +"""2dmatpedia ETL pipeline migrated from notebook logic.""" + +from __future__ import annotations + +import gzip +import json +from collections.abc import Iterable +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import TYPE_CHECKING +from urllib.request import urlretrieve + +from monty.json import MontyDecoder + +from mpcontribs.lux.pipelines import LuxETL + +from mpcontribs.lux.projects.twodmatpedia.schemas import DETAILS_URL, TwoDMatPediaRecord + +if TYPE_CHECKING: + from typing import Any + +class TwoDMatPediaETL(LuxETL): + """Extract, transform, and aggregate 2dmatpedia records.""" + + DB_JSON_URL = "http://www.2dmatpedia.org/static/db.json.gz" + + def extract(self) -> list[dict[str, Any]]: + """Download (if needed) and load raw JSONL records from db.json.gz.""" + + decoder = MontyDecoder() + with NamedTemporaryFile(suffix=".json.gz") as f: + urlretrieve(self.DB_JSON_URL, f.name) + + with gzip.open(self.db_file, "rb") as handle: + raw_records: list[dict[str, Any]] = [ + decoder.decode(line) + for line in handle + ] + return raw_records + + def transform(self, raw_records: Iterable[dict[str, Any]]) -> list[TwoDMatPediaRecord]: + """Filter to supported source prefixes and map to normalized dicts.""" + return [ + TwoDMatPediaRecord.from_raw(**raw) + for raw in records + ] + + def filter_existing(self, contributions: Iterable[TwoDMatPediaRecord]) -> list[TwoDMatPediaRecord]: + """Remove existing contributions based on the `details` data-id key.""" + existing = self.client.get_all_ids( + query={"project": self.project_name}, + data_id_fields={self.project_name: "details"}, + ).get(self.project_name, {}) + details_set = existing.get("details_set", set()) + + return [ + contribution + for contribution in contributions + if contribution.get("data", {}).get("details") not in details_set + ] + + def run(self, submit: bool = False, per_page: int = 30) -> list[TwoDMatPediaRecord]: + """Run ETL and optionally submit only missing records.""" + records = self.transform(self.extract()) + + if submit: + contributions = self.to_contributions(records) + if (missing := self.filter_existing(contributions)): + self.client.submit_contributions( + [record.to_contribs_entry() for record in missing] + ) + + return records diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py new file mode 100644 index 000000000..4172c030b --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py @@ -0,0 +1,153 @@ +"""Schemas for the 2dmatpedia project ETL migration.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pydantic import Field + +from emmet.core.types.pymatgen_types.structure_adapter import StructureType + +from mpcontribs.lux.schemas import ContributionRecord + +if TYPE_CHECKING: + from typing import Any + +PROJECT_DESCRIPTION = """ +We start from the around 80000 inorganic compounds in the Materials Project database. A geometry-based +algorithm [PRL] was used to identify layered structures among these compounds. Two-dimensional (2D) +materials were theoretically exfoliated by extracting one cluster in the standard conventional unit cell +of the layered structures screened in the above steps. A 20 Å vacuum along the c axis was imposed to +minimize the interactions of image slabs by periodic condition. Structure matcher tools from Pymatgen were +used to find duplicates of the exfoliated 2D materials. The standard workflow developed by the Materials +Project was used to perform high-throughput calculations for all the layered bulk and 2D materials screened +in this project. The calculations were performed by density functional theory as implemented in the Vienna +Ab Initio Simulation Package (VASP) software with Perdew-Burke-Ernzerhof (PBE) approximation for the +exchange-correlation functional and the frozen-core all-electron projector-augmented wave (PAW) method for +the electron-ion interaction. The cutoff energy for the plane wave expansion was set to 520 eV. +""".strip() + +PROJECT_LEGEND = { + "details": "link to detail page on 2dMatPedia", + "source": "link to source material", + "process": "discovery process (top-down or bottom-up)", + "ΔE": "band gap", + "Eᵈ": "decomposition energy", + "Eˣ": "exfoliation energy", + "E": "energy", + "Eᵛᵈʷ": "van-der-Waals energy", + "µ": "total magnetization", +} + +PROJECT_METADATA = { + "is_public": True, + "title": "2DMatPedia", + "long_title": "2D Materials Encyclopedia", + "owner": "migueldiascosta@nus.edu.sg", + "authors": "M. Dias Costa, F.Y. Ping, Z. Jun", + "description": PROJECT_DESCRIPTION, + "references": [ + {"label": "WWW", "url": "http://www.2dmatpedia.org"}, + {"label": "PRL", "url": "https://doi.org/10.1103/PhysRevLett.118.106101"}, + ], +} + +DETAILS_URL = "http://www.2dmatpedia.org/2dmaterials/doc/" + +SOURCE_PREFIXES: set[str] = {"mp", "mvc", "2dm"} + +class TwoDMatPediaRecord(ContributionRecord): + """Validated 2dmatpedia source record.""" + + material_id: str = Field(description="2dmatpedia material identifier.") + source_id: str = Field(description="Source material identifier.") + discovery_process: str | None = Field( + None, description="Discovery process (top-down or bottom-up)." + ) + bandgap: float | None = Field(None, description="Band gap in eV.") + decomposition_energy: float | None = Field( + None, description="Decomposition energy in eV/atom." + ) + exfoliation_energy_per_atom: float | None = Field( + None, description="Exfoliation energy in eV/atom." + ) + energy_per_atom: float | None = Field(None, description="Energy in eV/atom.") + energy_vdw_per_atom: float | None = Field( + None, description="Van-der-Waals energy in eV/atom." + ) + total_magnetization: float | None = Field( + None, description="Total magnetization in Bohr magnetons." + ) + + units : dict[str,str] = { + "bandgap": "eV", + "decomposition_energy": "eV/atom", + "exfoliation_energy_per_atom": "eV/atom", + "energy_per_atom": "eV/atom", + "energy_vdw_per_atom": "eV/atom", + "total_magnetization": "mu_B", + } + + @classmethod + def from_raw( + cls, raw: dict[str, Any], details_url: str = DETAILS_URL + ) -> TwoDMatPediaRecord | None: + """Create a validated record from one JSON line in db.json.gz.""" + source_id = raw.get("source_id") + material_id = raw.get("material_id") + if not source_id or not material_id: + return None + + if (prefix := str(source_id).split("-", 1)[0]) not in SOURCE_PREFIXES: + return None + + return cls( + identifier=( + material_id if prefix == "2dm" else source_id + ) or None, + formula=getattr(raw.get("structure"),"formula",None), + material_id=material_id, + source_id=source_id, + discovery_process=raw.get("discovery_process"), + bandgap=raw.get("bandgap"), + decomposition_energy=raw.get("decomposition_energy"), + exfoliation_energy_per_atom=raw.get("exfoliation_energy_per_atom"), + energy_per_atom=raw.get("energy_per_atom"), + energy_vdw_per_atom=raw.get("energy_vdw_per_atom"), + total_magnetization=raw.get("total_magnetization"), + structures=[raw.get("structure")] if raw.get("structure") else [], + ) + + def to_data_payload(self, details_url: str = DETAILS_URL) -> dict[str, str]: + """Convert normalized fields to the notebook's MPContribs data map.""" + payload: dict[str, str] = {"details": f"{details_url}{self.material_id}"} + + if self.discovery_process: + payload["process"] = self.discovery_process + + for key, value in ( + ("ΔE", self._with_unit(self.bandgap, "eV")), + ("Eᵈ", self._with_unit(self.decomposition_energy, "eV/atom")), + ("Eˣ", self._with_unit(self.exfoliation_energy_per_atom, "eV/atom")), + ("E", self._with_unit(self.energy_per_atom, "eV/atom")), + ("Eᵛᵈʷ", self._with_unit(self.energy_vdw_per_atom, "eV/atom")), + ("µ", self._with_unit(self.total_magnetization, "µᵇ")), + ): + if value is not None: + payload[key] = value + + return payload + + +INIT_COLUMNS = { + "details": None, + "source": None, + "process": None, + "ΔE": "eV", + "Eᵈ": "eV/atom", + "Eˣ": "eV/atom", + "E": "eV/atom", + "Eᵛᵈʷ": "eV/atom", + "µ": "µᵇ", + "structures": None, +} diff --git a/mpcontribs-lux/mpcontribs/lux/schemas.py b/mpcontribs-lux/mpcontribs/lux/schemas.py new file mode 100644 index 000000000..2ddd81d08 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/schemas.py @@ -0,0 +1,68 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from __future__ import annotations + +from pydantic import BaseModel, Field, model_serializer +from typing import TYPE_CHECKING, get_args + +from emmet.core.types.pymatgen_types.structure_adapter import StructureType + +if TYPE_CHECKING: + from typing_extensions import Self + from typing import Any + +NON_DATA_FIELDS = {"identifier","formula","units","aliases","structures","attachments"} + +class AttachmentRecord(BaseModel): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") + +class ContributionRecord(BaseModel): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(None, description="Reduced chemical formula") + + units : dict[str,str] = Field({},description="mapping of column names to units.",exclude=True) + structures : list[StructureType] = Field([], description="Structures associated with this entry.") + attachments : list[AttachmentRecord] = Field([]) + aliases : dict[str,str] = Field({}, description="Aliases of fields to use when generating column names.", exclude=True) + + def to_contribs_entry(self) -> dict[str,Any]: + """Format this entry as an MPContribs compatible entry.""" + return { + "identifier": self.identifier, + "formula": self.formula, + "data": { + k : f"{getattr(self,k,None)} {self.units.get(k,'')}".strip() + for k in set( + self.__class__.model_fields + ).difference(NON_DATA_FIELDS) + }, + "structures": self.structures, + "attachments": self.attachments, + } + + @property + def columns(self) -> dict[str,str]: + return { + k : self.units.get(k) or ( + "" if any(t in get_args(field.annotation) for t in (int,float)) else None + ) + for k, field in self.__class__.model_fields.items() + if k not in NON_DATA_FIELDS + } + + @property + def metadata(self) -> dict[str,str]: + return { + k : field.description + for k, field in self.__class__.model_fields.items() + if k not in NON_DATA_FIELDS and field.description + } \ No newline at end of file From 1e2e5e40ee2fcfd5c7accfab4ab8210291a4bf56 Mon Sep 17 00:00:00 2001 From: esoteric-ephemera Date: Fri, 13 Feb 2026 09:28:31 -0800 Subject: [PATCH 2/4] clean up ETL pipeline ensure it works for 2dmatpedia data --- mpcontribs-lux/mpcontribs/lux/pipelines.py | 59 ++++++++++++++----- .../lux/projects/twodmatpedia/pipelines.py | 27 ++++----- .../lux/projects/twodmatpedia/schemas.py | 46 +++++---------- mpcontribs-lux/mpcontribs/lux/schemas.py | 54 +++++++++++------ 4 files changed, 107 insertions(+), 79 deletions(-) diff --git a/mpcontribs-lux/mpcontribs/lux/pipelines.py b/mpcontribs-lux/mpcontribs/lux/pipelines.py index 9d3e641fc..f6cb58833 100644 --- a/mpcontribs-lux/mpcontribs/lux/pipelines.py +++ b/mpcontribs-lux/mpcontribs/lux/pipelines.py @@ -6,7 +6,7 @@ from __future__ import annotations from typing import TYPE_CHECKING -from mpcontribs.client import Client as MPCClient +from mpcontribs.client import Client as MPCClient, MPContribsClientError from mpcontribs.lux.schemas import ContributionRecord if TYPE_CHECKING: @@ -16,27 +16,54 @@ class LuxETL: """Perform basic extract, transform, load operations for MPContribs uploads.""" - def __init__(self, project : str | None = None, client: MPCClient | None = None) -> None: - - if not project: - raise ValueError( - "Project name cannot be null or an empty string!" - ) - self.project = project + project: str + schema: ContributionRecord | None = None + + def __init__(self, client: MPCClient | None = None, **kwargs) -> None: + self.client = client or MPCClient(project=self.project) + @classmethod + def init_project(cls, **kwargs) -> MPCClient: + try: + client = MPCClient(project=cls.project) + + except MPContribsClientError: + with MPCClient() as client: + mpr.contribs.create_project( + name=cls.project, + **kwargs, + ) + client = MPCClient(project=cls.project) + return client + + @classmethod + def init_columns_and_meta( + cls, unique_identifiers: bool | None = None, **kwargs + ) -> None: + if not cls.schema: + raise ValueError("No schema provided to initialize columns") + + client = cls.init_project(**kwargs) + client.init_columns(cls.schema.columns) + + meta = { + "other": cls.schema.metadata, + } + if unique_identifiers is not None: + meta["unique_identifiers"] = True + client.update_project(meta) + def extract(self) -> Iterable[dict]: """Load raw records from source files/APIs.""" return [] - def transform(self, raw_records: Iterable[dict]) -> Iterable[dict]: - """Normalize and clean raw records.""" + def transform(self, raw_records: Iterable[dict]) -> Iterable[dict[str, Any]]: + """Normalize, clean, and validate raw records.""" + if self.schema: + return [self.schema(**raw).to_contribs_entry() for raw in raw_records] return raw_records - def aggregate(self, records: Iterable[dict]) -> list[ContributionRecord]: - """Validate and convert records into typed schema instances.""" - return [ContributionRecord(**record) for record in records] - - def run(self) -> list[ContributionRecord]: + def run(self) -> None: """Execute the default local pipeline.""" - return self.aggregate(self.transform(self.extract())) + self.transform(self.extract()) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py index cd6c22068..a6529c67e 100644 --- a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py @@ -19,33 +19,33 @@ if TYPE_CHECKING: from typing import Any + class TwoDMatPediaETL(LuxETL): """Extract, transform, and aggregate 2dmatpedia records.""" - DB_JSON_URL = "http://www.2dmatpedia.org/static/db.json.gz" + project: str = "2dmatpedia" + DB_JSON_URL: str = "http://www.2dmatpedia.org/static/db.json.gz" def extract(self) -> list[dict[str, Any]]: """Download (if needed) and load raw JSONL records from db.json.gz.""" - + decoder = MontyDecoder() with NamedTemporaryFile(suffix=".json.gz") as f: urlretrieve(self.DB_JSON_URL, f.name) - with gzip.open(self.db_file, "rb") as handle: + with gzip.open(f.name, "rb") as handle: raw_records: list[dict[str, Any]] = [ - decoder.decode(line) - for line in handle + decoder.decode(line) for line in handle ] return raw_records - def transform(self, raw_records: Iterable[dict[str, Any]]) -> list[TwoDMatPediaRecord]: + def transform(self, raw_records: Iterable[dict[str, Any]]) -> list[dict[str, Any]]: """Filter to supported source prefixes and map to normalized dicts.""" - return [ - TwoDMatPediaRecord.from_raw(**raw) - for raw in records - ] + return [TwoDMatPediaRecord(**raw).to_contribs_entry() for raw in raw_records] - def filter_existing(self, contributions: Iterable[TwoDMatPediaRecord]) -> list[TwoDMatPediaRecord]: + def filter_existing( + self, contributions: Iterable[TwoDMatPediaRecord] + ) -> list[dict[str, Any]]: """Remove existing contributions based on the `details` data-id key.""" existing = self.client.get_all_ids( query={"project": self.project_name}, @@ -59,13 +59,12 @@ def filter_existing(self, contributions: Iterable[TwoDMatPediaRecord]) -> list[T if contribution.get("data", {}).get("details") not in details_set ] - def run(self, submit: bool = False, per_page: int = 30) -> list[TwoDMatPediaRecord]: + def run(self, submit: bool = False, per_page: int = 30) -> list[dict[str, Any]]: """Run ETL and optionally submit only missing records.""" records = self.transform(self.extract()) if submit: - contributions = self.to_contributions(records) - if (missing := self.filter_existing(contributions)): + if missing := self.filter_existing(contributions): self.client.submit_contributions( [record.to_contribs_entry() for record in missing] ) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py index 4172c030b..364bff27e 100644 --- a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING -from pydantic import Field +from pydantic import Field, model_validator from emmet.core.types.pymatgen_types.structure_adapter import StructureType @@ -56,11 +56,13 @@ SOURCE_PREFIXES: set[str] = {"mp", "mvc", "2dm"} + class TwoDMatPediaRecord(ContributionRecord): """Validated 2dmatpedia source record.""" - material_id: str = Field(description="2dmatpedia material identifier.") source_id: str = Field(description="Source material identifier.") + + material_id: str | None = Field(None, description="2dmatpedia material identifier.") discovery_process: str | None = Field( None, description="Discovery process (top-down or bottom-up)." ) @@ -79,7 +81,7 @@ class TwoDMatPediaRecord(ContributionRecord): None, description="Total magnetization in Bohr magnetons." ) - units : dict[str,str] = { + units: dict[str, str] = { "bandgap": "eV", "decomposition_energy": "eV/atom", "exfoliation_energy_per_atom": "eV/atom", @@ -88,35 +90,15 @@ class TwoDMatPediaRecord(ContributionRecord): "total_magnetization": "mu_B", } - @classmethod - def from_raw( - cls, raw: dict[str, Any], details_url: str = DETAILS_URL - ) -> TwoDMatPediaRecord | None: - """Create a validated record from one JSON line in db.json.gz.""" - source_id = raw.get("source_id") - material_id = raw.get("material_id") - if not source_id or not material_id: - return None - - if (prefix := str(source_id).split("-", 1)[0]) not in SOURCE_PREFIXES: - return None - - return cls( - identifier=( - material_id if prefix == "2dm" else source_id - ) or None, - formula=getattr(raw.get("structure"),"formula",None), - material_id=material_id, - source_id=source_id, - discovery_process=raw.get("discovery_process"), - bandgap=raw.get("bandgap"), - decomposition_energy=raw.get("decomposition_energy"), - exfoliation_energy_per_atom=raw.get("exfoliation_energy_per_atom"), - energy_per_atom=raw.get("energy_per_atom"), - energy_vdw_per_atom=raw.get("energy_vdw_per_atom"), - total_magnetization=raw.get("total_magnetization"), - structures=[raw.get("structure")] if raw.get("structure") else [], - ) + @model_validator(mode="before") + def set_identifier(cls, config: Any): + if not config.get("identifier"): + prefix = config["source_id"] + mpid = config.get("material_id") + config["identifier"] = ( + mpid if (mpid and prefix == "2dm") else config["source_id"] + ) + return config def to_data_payload(self, details_url: str = DETAILS_URL) -> dict[str, str]: """Convert normalized fields to the notebook's MPContribs data map.""" diff --git a/mpcontribs-lux/mpcontribs/lux/schemas.py b/mpcontribs-lux/mpcontribs/lux/schemas.py index 2ddd81d08..a7b3daa06 100644 --- a/mpcontribs-lux/mpcontribs/lux/schemas.py +++ b/mpcontribs-lux/mpcontribs/lux/schemas.py @@ -14,7 +14,15 @@ from typing_extensions import Self from typing import Any -NON_DATA_FIELDS = {"identifier","formula","units","aliases","structures","attachments"} +NON_DATA_FIELDS = { + "identifier", + "formula", + "units", + "aliases", + "structures", + "attachments", +} + class AttachmentRecord(BaseModel): """Attachment metadata schema (starter).""" @@ -23,46 +31,58 @@ class AttachmentRecord(BaseModel): name: str = Field(description="Attachment logical name") mime_type: str | None = Field(default=None, description="Attachment MIME type") + class ContributionRecord(BaseModel): """Core contribution row schema (starter).""" identifier: str = Field(description="Contribution identifier") formula: str | None = Field(None, description="Reduced chemical formula") - units : dict[str,str] = Field({},description="mapping of column names to units.",exclude=True) - structures : list[StructureType] = Field([], description="Structures associated with this entry.") - attachments : list[AttachmentRecord] = Field([]) - aliases : dict[str,str] = Field({}, description="Aliases of fields to use when generating column names.", exclude=True) + units: dict[str, str] = Field( + {}, description="mapping of column names to units.", exclude=True + ) + structures: list[StructureType] = Field( + [], description="Structures associated with this entry." + ) + attachments: list[AttachmentRecord] = Field([]) + aliases: dict[str, str] = Field( + {}, + description="Aliases of fields to use when generating column names.", + exclude=True, + ) - def to_contribs_entry(self) -> dict[str,Any]: + def to_contribs_entry(self) -> dict[str, Any]: """Format this entry as an MPContribs compatible entry.""" return { "identifier": self.identifier, "formula": self.formula, "data": { - k : f"{getattr(self,k,None)} {self.units.get(k,'')}".strip() - for k in set( - self.__class__.model_fields - ).difference(NON_DATA_FIELDS) + self.aliases.get( + k, k + ): f"{getattr(self,k,None)} {self.units.get(k,'')}".strip() + for k in set(self.__class__.model_fields).difference(NON_DATA_FIELDS) }, "structures": self.structures, "attachments": self.attachments, } @property - def columns(self) -> dict[str,str]: + def columns(self) -> dict[str, str]: return { - k : self.units.get(k) or ( - "" if any(t in get_args(field.annotation) for t in (int,float)) else None + self.aliases.get(k, k): self.units.get(k) + or ( + "" + if any(t in get_args(field.annotation) for t in (int, float)) + else None ) for k, field in self.__class__.model_fields.items() if k not in NON_DATA_FIELDS } - + @property - def metadata(self) -> dict[str,str]: + def metadata(self) -> dict[str, str]: return { - k : field.description + self.aliases.get(k, k): field.description for k, field in self.__class__.model_fields.items() if k not in NON_DATA_FIELDS and field.description - } \ No newline at end of file + } From 8572e7d2239e07948da5a357c0069da51c449891 Mon Sep 17 00:00:00 2001 From: esoteric-ephemera Date: Fri, 13 Feb 2026 10:35:48 -0800 Subject: [PATCH 3/4] cleanup + transparent conductors --- .../transparent_conductors/__init__.py | 20 +- .../transparent_conductors/pipelines.py | 45 +-- .../transparent_conductors/schemas.py | 288 +++++++++++++++++- .../lux/projects/twodmatpedia/__init__.py | 19 +- .../lux/projects/twodmatpedia/pipelines.py | 35 +-- .../lux/projects/twodmatpedia/schemas.py | 59 ---- 6 files changed, 324 insertions(+), 142 deletions(-) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py index 4a6ebcff1..bcc2d0aa3 100644 --- a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py +++ b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py @@ -1 +1,19 @@ -"""Project module scaffold.""" +"""transparent_conductors project schemas and pipelines.""" + +from .pipelines import TransparentConductorsETL +from .schemas import ( + GOOGLE_SHEET_ID, + GOOGLE_SHEET_URL, + PROJECT_NAME, + SHEETS, + TransparentConductorRecord, +) + +__all__ = [ + "GOOGLE_SHEET_ID", + "GOOGLE_SHEET_URL", + "PROJECT_NAME", + "SHEETS", + "TransparentConductorsETL", + "TransparentConductorRecord", +] diff --git a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py index a96b1591c..f5bdeda82 100644 --- a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py +++ b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py @@ -1,28 +1,39 @@ -"""Data pipeline scaffold for this project. +"""transparent_conductors ETL pipeline migrated from notebook logic.""" -Implement extraction, transformation, aggregation, and optional upload helpers here. -""" +from __future__ import annotations -from collections.abc import Iterable +from typing import TYPE_CHECKING -from .schemas import ContributionRecord +from pandas import read_excel +from mpcontribs.lux.pipelines import LuxETL -def extract() -> Iterable[dict]: - """Load raw records from source files/APIs.""" - return [] +from .schemas import GOOGLE_SHEET_URL, PROJECT_NAME, SHEETS, TransparentConductorRecord +if TYPE_CHECKING: + from collections.abc import Iterable + from typing import Any + from mpcontribs.lux.schemas import ContributionRecord -def transform(raw_records: Iterable[dict]) -> Iterable[dict]: - """Normalize and clean raw records.""" - return raw_records +class TransparentConductorsETL(LuxETL): + """Extract and transform transparent conductor entries from the source workbook.""" -def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: - """Validate and convert records into typed schema instances.""" - return [ContributionRecord(**record) for record in records] + project = PROJECT_NAME + schema = TransparentConductorRecord + def extract(self) -> list[dict[str, Any]]: + """Read workbook sheets and parse each row into schema-ready dicts.""" + records: list[dict[str, Any]] = [] + for sheet_name in SHEETS: + doping = sheet_name.split(" ")[0] + df = read_excel(GOOGLE_SHEET_URL, sheet_name=sheet_name, header=[0, 1, 2]) -def run() -> list[ContributionRecord]: - """Execute the default local pipeline.""" - return aggregate(transform(extract())) + records += [row for row in df.to_dict(orient="records")] + return records + + def transform(self, raw_records: Iterable[dict]) -> Iterable[dict[str, Any]]: + """Normalize, clean, and validate raw records.""" + return [ + self.schema.from_sheet_row(raw).to_contribs_entry() for raw in raw_records + ] diff --git a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py index f7f497df5..a197e3c3e 100644 --- a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py +++ b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py @@ -1,21 +1,283 @@ -"""Pydantic schemas for this project. +"""Schemas for the transparent_conductors project ETL migration.""" -Replace these starter models with project-specific, fully documented schemas. -""" +from __future__ import annotations -from pydantic import BaseModel, Field +import math +from typing import Any +from pydantic import Field -class ContributionRecord(BaseModel, extra="forbid"): - """Core contribution row schema (starter).""" +from mpcontribs.lux.schemas import NON_DATA_FIELDS, ContributionRecord - identifier: str = Field(description="Contribution identifier") - formula: str | None = Field(default=None, description="Reduced chemical formula") +PROJECT_NAME = "transparent_conductors" +GOOGLE_SHEET_ID = "1bgQAdSfyrPEDI4iljwWlkyUPt_mo84jWr4N_1DKQDUI" +GOOGLE_SHEET_URL = ( + f"https://docs.google.com/spreadsheets/d/{GOOGLE_SHEET_ID}/export?format=xlsx" +) +SHEETS = ("n-type TCs", "p-type TCs") +RAW_TO_FIELD = { + "doping": "doping", + "number of studies": "studies", + "quality.good or ok": "quality", + "structure and composition.common dopants": "dopants", + "structure and composition.space group symbol": "spacegroup", + "branch point energy.bpe min ratio": "bpe_ratio_min", + "branch point energy.bpe max ratio": "bpe_ratio_max", + "branch point energy.bpe ratio": "bpe_ratio_mean", + "branch point energy.has degenerate bands": "bpe_degenerate", + "computed gap.hse06 band gap": "computed_gap_hse06_band", + "computed gap.hse06 direct gap": "computed_gap_hse06_direct", + "computed gap.pbe band gap": "computed_gap_pbe_band", + "computed gap.pbe direct gap": "computed_gap_pbe_direct", + "computed m*.conditions": "computed_mstar_conditions", + "computed m*.m* avg": "computed_mstar_average", + "computed m*.m* planar": "computed_mstar_planar", + "computed stability.e_above_hull": "computed_stability_e_hull", + "computed stability.e_above_pourbaix_hull": "computed_stability_e_pourbaix_hull", + "experimental doping type": "experimental_doping", + "experimental gap.max experimental gap": "experimental_gap_range_max", + "experimental gap.max gap reference": "experimental_gap_reference_max", + "experimental gap.min experimental gap": "experimental_gap_range_min", + "experimental gap.min gap reference": "experimental_gap_reference_min", + "max experimental conductivity.associated carrier concentration": "experimental_conductivity_concentration", + "max experimental conductivity.dopant": "experimental_conductivity_dopant", + "max experimental conductivity.max conductivity": "experimental_conductivity_max", + "max experimental conductivity.reference link": "experimental_conductivity_reference", + "max experimental conductivity.synthesis method": "experimental_conductivity_method", + "max experimental mobility.dopant": "experimental_mobility_dopant", + "max experimental mobility.max mobility": "experimental_mobility_max", + "max experimental mobility.reference link": "experimental_mobility_reference", + "max experimental mobility.synthesis method": "experimental_mobility_method", +} -class AttachmentRecord(BaseModel, extra="forbid"): - """Attachment metadata schema (starter).""" +ALIASES = { + "studies": "studies", + "quality": "quality", + "dopants": "dopants", + "spacegroup": "spacegroup", + "bpe_ratio_min": "BPE.ratio.min", + "bpe_ratio_max": "BPE.ratio.max", + "bpe_ratio_mean": "BPE.ratio.mean", + "bpe_degenerate": "BPE.degenerate", + "computed_gap_hse06_band": "computed.gap.HSE06.band", + "computed_gap_hse06_direct": "computed.gap.HSE06.direct", + "computed_gap_pbe_band": "computed.gap.PBE.band", + "computed_gap_pbe_direct": "computed.gap.PBE.direct", + "computed_mstar_conditions": "computed.m*.conditions", + "computed_mstar_average": "computed.m*.average", + "computed_mstar_planar": "computed.m*.planar", + "computed_stability_e_hull": "computed.stability.Eₕ", + "computed_stability_e_pourbaix_hull": "computed.stability.Eₚₕ", + "experimental_doping": "experimental.doping", + "experimental_gap_range_max": "experimental.gap.range.max", + "experimental_gap_reference_max": "experimental.gap.references.max", + "experimental_gap_range_min": "experimental.gap.range.min", + "experimental_gap_reference_min": "experimental.gap.references.min", + "experimental_conductivity_concentration": "experimental.conductivity.concentration", + "experimental_conductivity_dopant": "experimental.conductivity.dopant", + "experimental_conductivity_max": "experimental.conductivity.max", + "experimental_conductivity_reference": "experimental.conductivity.reference", + "experimental_conductivity_method": "experimental.conductivity.method", + "experimental_mobility_dopant": "experimental.mobility.dopant", + "experimental_mobility_max": "experimental.mobility.max", + "experimental_mobility_reference": "experimental.mobility.reference", + "experimental_mobility_method": "experimental.mobility.method", +} - identifier: str = Field(description="Contribution identifier") - name: str = Field(description="Attachment logical name") - mime_type: str | None = Field(default=None, description="Attachment MIME type") +UNITS = { + "studies": "", + "bpe_ratio_min": "", + "bpe_ratio_max": "", + "bpe_ratio_mean": "", + "computed_gap_hse06_band": "eV", + "computed_gap_hse06_direct": "eV", + "computed_gap_pbe_band": "eV", + "computed_gap_pbe_direct": "eV", + "computed_mstar_average": "", + "computed_mstar_planar": "", + "computed_stability_e_hull": "eV", + "computed_stability_e_pourbaix_hull": "eV", + "experimental_gap_range_max": "eV", + "experimental_gap_range_min": "eV", + "experimental_conductivity_concentration": "cm⁻³", + "experimental_conductivity_max": "S/cm", + "experimental_mobility_max": "cm²/V/s", +} + + +def _is_nan(value: Any) -> bool: + return isinstance(value, float) and math.isnan(value) + + +class TransparentConductorRecord(ContributionRecord): + """Typed entry for one transparent conductor contribution.""" + + aliases: dict[str, str] = ALIASES + units: dict[str, str] = UNITS + + doping: str = Field( + description="Doping class from source sheet (n-type or p-type)." + ) + studies: int | float | None = Field(None, description="Number of studies.") + quality: str | None = Field(None, description="Quality label (good or ok).") + dopants: str | None = Field(None, description="Common dopants.") + spacegroup: str | None = Field(None, description="Space group symbol.") + + bpe_ratio_min: float | int | str | None = Field( + None, description="Minimum BPE ratio." + ) + bpe_ratio_max: float | int | str | None = Field( + None, description="Maximum BPE ratio." + ) + bpe_ratio_mean: float | int | str | None = Field( + None, description="Mean BPE ratio." + ) + bpe_degenerate: str | bool | None = Field( + None, description="Whether degenerate bands exist." + ) + + computed_gap_hse06_band: float | int | str | None = Field( + None, description="Computed HSE06 band gap." + ) + computed_gap_hse06_direct: float | int | str | None = Field( + None, description="Computed HSE06 direct gap." + ) + computed_gap_pbe_band: float | int | str | None = Field( + None, description="Computed PBE band gap." + ) + computed_gap_pbe_direct: float | int | str | None = Field( + None, description="Computed PBE direct gap." + ) + computed_mstar_conditions: str | None = Field( + None, description="Effective mass conditions." + ) + computed_mstar_average: float | int | str | None = Field( + None, description="Average effective mass." + ) + computed_mstar_planar: float | int | str | None = Field( + None, description="Planar effective mass." + ) + computed_stability_e_hull: float | int | str | None = Field( + None, description="Energy above hull." + ) + computed_stability_e_pourbaix_hull: float | int | str | None = Field( + None, description="Energy above Pourbaix hull." + ) + + experimental_doping: str | None = Field( + None, description="Experimental doping type." + ) + experimental_gap_range_max: float | int | str | None = Field( + None, description="Maximum experimental gap." + ) + experimental_gap_reference_max: str | None = Field( + None, description="Reference for max experimental gap." + ) + experimental_gap_range_min: float | int | str | None = Field( + None, description="Minimum experimental gap." + ) + experimental_gap_reference_min: str | None = Field( + None, description="Reference for min experimental gap." + ) + + experimental_conductivity_concentration: float | int | str | None = Field( + None, description="Carrier concentration at max conductivity." + ) + experimental_conductivity_dopant: str | None = Field( + None, description="Dopant for max conductivity." + ) + experimental_conductivity_max: float | int | str | None = Field( + None, description="Maximum experimental conductivity." + ) + experimental_conductivity_reference: str | None = Field( + None, description="Reference for conductivity data." + ) + experimental_conductivity_method: str | None = Field( + None, description="Synthesis method for conductivity." + ) + + experimental_mobility_dopant: str | None = Field( + None, description="Dopant for max mobility." + ) + experimental_mobility_max: float | int | str | None = Field( + None, description="Maximum experimental mobility." + ) + experimental_mobility_reference: str | None = Field( + None, description="Reference for mobility data." + ) + experimental_mobility_method: str | None = Field( + None, description="Synthesis method for mobility." + ) + + @classmethod + def _clean_header_key(cls, keys: tuple[str, ...]) -> str: + key = ".".join( + [k.replace("TC", "").strip() for k in keys if not k.startswith("Unnamed:")] + ) + if key.endswith("experimental doping type"): + key = key.replace("Transport.", "") + key_split = key.split(".") + if len(key_split) > 2: + key = ".".join(key_split[1:]) + if key.endswith("google scholar"): + key = key.replace(".google scholar", "") + return key + + @staticmethod + def _normalize_unit(unit: str) -> str: + unit = unit.replace("^-3", "⁻³").replace("^20", "²⁰") + unit = unit.replace("V2/cms", "cm²/V/s").replace("cm^2/Vs", "cm²/V/s") + return unit + + @classmethod + def from_sheet_row( + cls, row: dict[tuple[str, ...], Any], doping: str + ) -> TransparentConductorRecord | None: + """Convert one spreadsheet row to a validated transparent conductor record.""" + identifier: str | None = None + extracted: dict[str, Any] = {"doping": doping} + + for keys, value in row.items(): + key = cls._clean_header_key(keys) + if key.endswith("MP link") or key.endswith("range"): + continue + + if key == "Material.mpid": + if identifier is None: + if _is_nan(value): + return None + identifier = str(value).strip() + continue + + if key == "Material.p pretty formula": + key = "formula" + + if isinstance(value, str): + normalized: Any = value.strip() + else: + if _is_nan(value): + continue + if key.endswith(")"): + key, unit = key.rsplit(" (", 1) + unit = cls._normalize_unit(unit[:-1]) + if "," in unit: + extra_key = key.rsplit(".", 1)[0].lower() + ".conditions" + extracted[extra_key] = unit + normalized = value + + if normalized in ("", None): + continue + + clean_key = key.replace(" for VB:CB = 4:2", "").replace("?", "").lower() + extracted[clean_key] = normalized + + if not identifier: + return None + + fields = {"identifier": identifier, "formula": extracted.get("formula")} + for raw_key, field_name in RAW_TO_FIELD.items(): + if raw_key in extracted: + fields[field_name] = extracted[raw_key] + + return cls(**fields) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py index f4f7f5ce1..5b720b527 100644 --- a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py @@ -1,21 +1,4 @@ """2dmatpedia project schemas and pipelines.""" from .pipelines import TwoDMatPediaETL -from .schemas import ( - DETAILS_URL, - INIT_COLUMNS, - PROJECT_DESCRIPTION, - PROJECT_LEGEND, - PROJECT_METADATA, - TwoDMatPediaRecord, -) - -__all__ = [ - "DETAILS_URL", - "INIT_COLUMNS", - "PROJECT_DESCRIPTION", - "PROJECT_LEGEND", - "PROJECT_METADATA", - "TwoDMatPediaETL", - "TwoDMatPediaRecord", -] +from .schemas import TwoDMatPediaRecord diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py index a6529c67e..f843a0c89 100644 --- a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py @@ -13,8 +13,7 @@ from monty.json import MontyDecoder from mpcontribs.lux.pipelines import LuxETL - -from mpcontribs.lux.projects.twodmatpedia.schemas import DETAILS_URL, TwoDMatPediaRecord +from mpcontribs.lux.projects.twodmatpedia.schemas import TwoDMatPediaRecord if TYPE_CHECKING: from typing import Any @@ -38,35 +37,3 @@ def extract(self) -> list[dict[str, Any]]: decoder.decode(line) for line in handle ] return raw_records - - def transform(self, raw_records: Iterable[dict[str, Any]]) -> list[dict[str, Any]]: - """Filter to supported source prefixes and map to normalized dicts.""" - return [TwoDMatPediaRecord(**raw).to_contribs_entry() for raw in raw_records] - - def filter_existing( - self, contributions: Iterable[TwoDMatPediaRecord] - ) -> list[dict[str, Any]]: - """Remove existing contributions based on the `details` data-id key.""" - existing = self.client.get_all_ids( - query={"project": self.project_name}, - data_id_fields={self.project_name: "details"}, - ).get(self.project_name, {}) - details_set = existing.get("details_set", set()) - - return [ - contribution - for contribution in contributions - if contribution.get("data", {}).get("details") not in details_set - ] - - def run(self, submit: bool = False, per_page: int = 30) -> list[dict[str, Any]]: - """Run ETL and optionally submit only missing records.""" - records = self.transform(self.extract()) - - if submit: - if missing := self.filter_existing(contributions): - self.client.submit_contributions( - [record.to_contribs_entry() for record in missing] - ) - - return records diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py index 364bff27e..860992554 100644 --- a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py @@ -27,20 +27,7 @@ the electron-ion interaction. The cutoff energy for the plane wave expansion was set to 520 eV. """.strip() -PROJECT_LEGEND = { - "details": "link to detail page on 2dMatPedia", - "source": "link to source material", - "process": "discovery process (top-down or bottom-up)", - "ΔE": "band gap", - "Eᵈ": "decomposition energy", - "Eˣ": "exfoliation energy", - "E": "energy", - "Eᵛᵈʷ": "van-der-Waals energy", - "µ": "total magnetization", -} - PROJECT_METADATA = { - "is_public": True, "title": "2DMatPedia", "long_title": "2D Materials Encyclopedia", "owner": "migueldiascosta@nus.edu.sg", @@ -52,8 +39,6 @@ ], } -DETAILS_URL = "http://www.2dmatpedia.org/2dmaterials/doc/" - SOURCE_PREFIXES: set[str] = {"mp", "mvc", "2dm"} @@ -89,47 +74,3 @@ class TwoDMatPediaRecord(ContributionRecord): "energy_vdw_per_atom": "eV/atom", "total_magnetization": "mu_B", } - - @model_validator(mode="before") - def set_identifier(cls, config: Any): - if not config.get("identifier"): - prefix = config["source_id"] - mpid = config.get("material_id") - config["identifier"] = ( - mpid if (mpid and prefix == "2dm") else config["source_id"] - ) - return config - - def to_data_payload(self, details_url: str = DETAILS_URL) -> dict[str, str]: - """Convert normalized fields to the notebook's MPContribs data map.""" - payload: dict[str, str] = {"details": f"{details_url}{self.material_id}"} - - if self.discovery_process: - payload["process"] = self.discovery_process - - for key, value in ( - ("ΔE", self._with_unit(self.bandgap, "eV")), - ("Eᵈ", self._with_unit(self.decomposition_energy, "eV/atom")), - ("Eˣ", self._with_unit(self.exfoliation_energy_per_atom, "eV/atom")), - ("E", self._with_unit(self.energy_per_atom, "eV/atom")), - ("Eᵛᵈʷ", self._with_unit(self.energy_vdw_per_atom, "eV/atom")), - ("µ", self._with_unit(self.total_magnetization, "µᵇ")), - ): - if value is not None: - payload[key] = value - - return payload - - -INIT_COLUMNS = { - "details": None, - "source": None, - "process": None, - "ΔE": "eV", - "Eᵈ": "eV/atom", - "Eˣ": "eV/atom", - "E": "eV/atom", - "Eᵛᵈʷ": "eV/atom", - "µ": "µᵇ", - "structures": None, -} From 7acd1a68489d3f3b02755239523395dbca3b484a Mon Sep 17 00:00:00 2001 From: esoteric-ephemera Date: Fri, 13 Feb 2026 10:40:55 -0800 Subject: [PATCH 4/4] clean up etl + docs --- mpcontribs-lux/mpcontribs/lux/pipelines.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/mpcontribs-lux/mpcontribs/lux/pipelines.py b/mpcontribs-lux/mpcontribs/lux/pipelines.py index f6cb58833..b1b88de5b 100644 --- a/mpcontribs-lux/mpcontribs/lux/pipelines.py +++ b/mpcontribs-lux/mpcontribs/lux/pipelines.py @@ -14,7 +14,19 @@ class LuxETL: - """Perform basic extract, transform, load operations for MPContribs uploads.""" + """Perform basic extract, transform, load operations for MPContribs uploads. + + To use a LuxETL: + 1. (Optional) If you want to upload data to MPContribs, you first must + create the project. Run a given LuxETL class's `init_columns_and_meta` + method: `LuxETL.init_columns_and_meta` which will set up the project. + 2. Initialize the ETL class and run `LuxETL().load()`, which will + execute the pipeline and return a list of records. + To submit these to MPContribs, run instead `LuxETL().load(submit=True)`. + + NB: if you define a schema using `ContributionRecord`, the conversion to + MPContribs-format data structures will be handled automatically. + """ project: str schema: ContributionRecord | None = None @@ -64,6 +76,9 @@ def transform(self, raw_records: Iterable[dict]) -> Iterable[dict[str, Any]]: return [self.schema(**raw).to_contribs_entry() for raw in raw_records] return raw_records - def run(self) -> None: + def load(self, submit: bool = False) -> Iterable[dict[str, Any]]: """Execute the default local pipeline.""" - self.transform(self.extract()) + records = self.transform(self.extract()) + if submit: + self.client.submit_contributions(records) + return records