diff --git a/mpcontribs-lux/mpcontribs/lux/pipelines.py b/mpcontribs-lux/mpcontribs/lux/pipelines.py new file mode 100644 index 000000000..b1b88de5b --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/pipelines.py @@ -0,0 +1,84 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from __future__ import annotations +from typing import TYPE_CHECKING + +from mpcontribs.client import Client as MPCClient, MPContribsClientError +from mpcontribs.lux.schemas import ContributionRecord + +if TYPE_CHECKING: + from collections.abc import Iterable + + +class LuxETL: + """Perform basic extract, transform, load operations for MPContribs uploads. + + To use a LuxETL: + 1. (Optional) If you want to upload data to MPContribs, you first must + create the project. Run a given LuxETL class's `init_columns_and_meta` + method: `LuxETL.init_columns_and_meta` which will set up the project. + 2. Initialize the ETL class and run `LuxETL().load()`, which will + execute the pipeline and return a list of records. + To submit these to MPContribs, run instead `LuxETL().load(submit=True)`. + + NB: if you define a schema using `ContributionRecord`, the conversion to + MPContribs-format data structures will be handled automatically. + """ + + project: str + schema: ContributionRecord | None = None + + def __init__(self, client: MPCClient | None = None, **kwargs) -> None: + + self.client = client or MPCClient(project=self.project) + + @classmethod + def init_project(cls, **kwargs) -> MPCClient: + try: + client = MPCClient(project=cls.project) + + except MPContribsClientError: + with MPCClient() as client: + mpr.contribs.create_project( + name=cls.project, + **kwargs, + ) + client = MPCClient(project=cls.project) + return client + + @classmethod + def init_columns_and_meta( + cls, unique_identifiers: bool | None = None, **kwargs + ) -> None: + if not cls.schema: + raise ValueError("No schema provided to initialize columns") + + client = cls.init_project(**kwargs) + client.init_columns(cls.schema.columns) + + meta = { + "other": cls.schema.metadata, + } + if unique_identifiers is not None: + meta["unique_identifiers"] = True + client.update_project(meta) + + def extract(self) -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + def transform(self, raw_records: Iterable[dict]) -> Iterable[dict[str, Any]]: + """Normalize, clean, and validate raw records.""" + if self.schema: + return [self.schema(**raw).to_contribs_entry() for raw in raw_records] + return raw_records + + def load(self, submit: bool = False) -> Iterable[dict[str, Any]]: + """Execute the default local pipeline.""" + records = self.transform(self.extract()) + if submit: + self.client.submit_contributions(records) + return records diff --git a/mpcontribs-lux/mpcontribs/lux/projects/attachments/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/attachments/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/attachments/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/attachments/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/attachments/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/attachments/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/auph3/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/auph3/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/auph3/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/auph3/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/auph3/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/auph3/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/barin_tables/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/bioi_defects/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/broberg_benchmark_defects/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/cards/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/cards/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/cards/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/cards/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/cards/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/cards/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/carrier_transport/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/defect_genome_pcfc_materials/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/delta_hvacancy/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dilute_solute_diffusion/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dtu/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/dtu/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dtu/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dtu/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/dtu/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dtu/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/dtu/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/dtu/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/dtu/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ediffcrystalprediction/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/esters/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/esters/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/esters/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/esters/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/esters/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/esters/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/esters/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/esters/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/esters/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/exp_xas/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermo/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/experimental_thermoelectrics/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ferroelectrics/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/forbidden_transitions/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/gbdb/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/gbdb/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/gbdb/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/gbdb/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/hfp2023/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/intermatch/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/intermatch/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/intermatch/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/intermatch/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ion_ref_data/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/jarvis_dft_2023/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/matscholar/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/matscholar/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/matscholar/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/matscholar/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/melting_points/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/melting_points/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/melting_points/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/melting_points/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mg_cathode_screening_2022/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mno2_phase_selection/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/mofexplorer/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_update/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/ocp_upload/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/open_catalyst_project/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/perovskites_diffusion/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pycroscopy/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/pydatarecognition/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/qsgw_band_structures/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/screening_inorganic_pv/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/silicon_defects/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/simple_test/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/simple_test/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/simple_test/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/simple_test/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/simple_test/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/simple_test/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/springer_materials/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/swf/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/swf/__init__.py new file mode 100644 index 000000000..4a6ebcff1 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/swf/__init__.py @@ -0,0 +1 @@ +"""Project module scaffold.""" diff --git a/mpcontribs-lux/mpcontribs/lux/projects/swf/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/swf/pipelines.py new file mode 100644 index 000000000..a96b1591c --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/swf/pipelines.py @@ -0,0 +1,28 @@ +"""Data pipeline scaffold for this project. + +Implement extraction, transformation, aggregation, and optional upload helpers here. +""" + +from collections.abc import Iterable + +from .schemas import ContributionRecord + + +def extract() -> Iterable[dict]: + """Load raw records from source files/APIs.""" + return [] + + +def transform(raw_records: Iterable[dict]) -> Iterable[dict]: + """Normalize and clean raw records.""" + return raw_records + + +def aggregate(records: Iterable[dict]) -> list[ContributionRecord]: + """Validate and convert records into typed schema instances.""" + return [ContributionRecord(**record) for record in records] + + +def run() -> list[ContributionRecord]: + """Execute the default local pipeline.""" + return aggregate(transform(extract())) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/swf/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/swf/schemas.py new file mode 100644 index 000000000..f7f497df5 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/swf/schemas.py @@ -0,0 +1,21 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from pydantic import BaseModel, Field + + +class ContributionRecord(BaseModel, extra="forbid"): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(default=None, description="Reduced chemical formula") + + +class AttachmentRecord(BaseModel, extra="forbid"): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") diff --git a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py new file mode 100644 index 000000000..bcc2d0aa3 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/__init__.py @@ -0,0 +1,19 @@ +"""transparent_conductors project schemas and pipelines.""" + +from .pipelines import TransparentConductorsETL +from .schemas import ( + GOOGLE_SHEET_ID, + GOOGLE_SHEET_URL, + PROJECT_NAME, + SHEETS, + TransparentConductorRecord, +) + +__all__ = [ + "GOOGLE_SHEET_ID", + "GOOGLE_SHEET_URL", + "PROJECT_NAME", + "SHEETS", + "TransparentConductorsETL", + "TransparentConductorRecord", +] diff --git a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py new file mode 100644 index 000000000..f5bdeda82 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/pipelines.py @@ -0,0 +1,39 @@ +"""transparent_conductors ETL pipeline migrated from notebook logic.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pandas import read_excel + +from mpcontribs.lux.pipelines import LuxETL + +from .schemas import GOOGLE_SHEET_URL, PROJECT_NAME, SHEETS, TransparentConductorRecord + +if TYPE_CHECKING: + from collections.abc import Iterable + from typing import Any + from mpcontribs.lux.schemas import ContributionRecord + + +class TransparentConductorsETL(LuxETL): + """Extract and transform transparent conductor entries from the source workbook.""" + + project = PROJECT_NAME + schema = TransparentConductorRecord + + def extract(self) -> list[dict[str, Any]]: + """Read workbook sheets and parse each row into schema-ready dicts.""" + records: list[dict[str, Any]] = [] + for sheet_name in SHEETS: + doping = sheet_name.split(" ")[0] + df = read_excel(GOOGLE_SHEET_URL, sheet_name=sheet_name, header=[0, 1, 2]) + + records += [row for row in df.to_dict(orient="records")] + return records + + def transform(self, raw_records: Iterable[dict]) -> Iterable[dict[str, Any]]: + """Normalize, clean, and validate raw records.""" + return [ + self.schema.from_sheet_row(raw).to_contribs_entry() for raw in raw_records + ] diff --git a/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py new file mode 100644 index 000000000..a197e3c3e --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/transparent_conductors/schemas.py @@ -0,0 +1,283 @@ +"""Schemas for the transparent_conductors project ETL migration.""" + +from __future__ import annotations + +import math +from typing import Any + +from pydantic import Field + +from mpcontribs.lux.schemas import NON_DATA_FIELDS, ContributionRecord + +PROJECT_NAME = "transparent_conductors" +GOOGLE_SHEET_ID = "1bgQAdSfyrPEDI4iljwWlkyUPt_mo84jWr4N_1DKQDUI" +GOOGLE_SHEET_URL = ( + f"https://docs.google.com/spreadsheets/d/{GOOGLE_SHEET_ID}/export?format=xlsx" +) +SHEETS = ("n-type TCs", "p-type TCs") + +RAW_TO_FIELD = { + "doping": "doping", + "number of studies": "studies", + "quality.good or ok": "quality", + "structure and composition.common dopants": "dopants", + "structure and composition.space group symbol": "spacegroup", + "branch point energy.bpe min ratio": "bpe_ratio_min", + "branch point energy.bpe max ratio": "bpe_ratio_max", + "branch point energy.bpe ratio": "bpe_ratio_mean", + "branch point energy.has degenerate bands": "bpe_degenerate", + "computed gap.hse06 band gap": "computed_gap_hse06_band", + "computed gap.hse06 direct gap": "computed_gap_hse06_direct", + "computed gap.pbe band gap": "computed_gap_pbe_band", + "computed gap.pbe direct gap": "computed_gap_pbe_direct", + "computed m*.conditions": "computed_mstar_conditions", + "computed m*.m* avg": "computed_mstar_average", + "computed m*.m* planar": "computed_mstar_planar", + "computed stability.e_above_hull": "computed_stability_e_hull", + "computed stability.e_above_pourbaix_hull": "computed_stability_e_pourbaix_hull", + "experimental doping type": "experimental_doping", + "experimental gap.max experimental gap": "experimental_gap_range_max", + "experimental gap.max gap reference": "experimental_gap_reference_max", + "experimental gap.min experimental gap": "experimental_gap_range_min", + "experimental gap.min gap reference": "experimental_gap_reference_min", + "max experimental conductivity.associated carrier concentration": "experimental_conductivity_concentration", + "max experimental conductivity.dopant": "experimental_conductivity_dopant", + "max experimental conductivity.max conductivity": "experimental_conductivity_max", + "max experimental conductivity.reference link": "experimental_conductivity_reference", + "max experimental conductivity.synthesis method": "experimental_conductivity_method", + "max experimental mobility.dopant": "experimental_mobility_dopant", + "max experimental mobility.max mobility": "experimental_mobility_max", + "max experimental mobility.reference link": "experimental_mobility_reference", + "max experimental mobility.synthesis method": "experimental_mobility_method", +} + +ALIASES = { + "studies": "studies", + "quality": "quality", + "dopants": "dopants", + "spacegroup": "spacegroup", + "bpe_ratio_min": "BPE.ratio.min", + "bpe_ratio_max": "BPE.ratio.max", + "bpe_ratio_mean": "BPE.ratio.mean", + "bpe_degenerate": "BPE.degenerate", + "computed_gap_hse06_band": "computed.gap.HSE06.band", + "computed_gap_hse06_direct": "computed.gap.HSE06.direct", + "computed_gap_pbe_band": "computed.gap.PBE.band", + "computed_gap_pbe_direct": "computed.gap.PBE.direct", + "computed_mstar_conditions": "computed.m*.conditions", + "computed_mstar_average": "computed.m*.average", + "computed_mstar_planar": "computed.m*.planar", + "computed_stability_e_hull": "computed.stability.Eₕ", + "computed_stability_e_pourbaix_hull": "computed.stability.Eₚₕ", + "experimental_doping": "experimental.doping", + "experimental_gap_range_max": "experimental.gap.range.max", + "experimental_gap_reference_max": "experimental.gap.references.max", + "experimental_gap_range_min": "experimental.gap.range.min", + "experimental_gap_reference_min": "experimental.gap.references.min", + "experimental_conductivity_concentration": "experimental.conductivity.concentration", + "experimental_conductivity_dopant": "experimental.conductivity.dopant", + "experimental_conductivity_max": "experimental.conductivity.max", + "experimental_conductivity_reference": "experimental.conductivity.reference", + "experimental_conductivity_method": "experimental.conductivity.method", + "experimental_mobility_dopant": "experimental.mobility.dopant", + "experimental_mobility_max": "experimental.mobility.max", + "experimental_mobility_reference": "experimental.mobility.reference", + "experimental_mobility_method": "experimental.mobility.method", +} + +UNITS = { + "studies": "", + "bpe_ratio_min": "", + "bpe_ratio_max": "", + "bpe_ratio_mean": "", + "computed_gap_hse06_band": "eV", + "computed_gap_hse06_direct": "eV", + "computed_gap_pbe_band": "eV", + "computed_gap_pbe_direct": "eV", + "computed_mstar_average": "", + "computed_mstar_planar": "", + "computed_stability_e_hull": "eV", + "computed_stability_e_pourbaix_hull": "eV", + "experimental_gap_range_max": "eV", + "experimental_gap_range_min": "eV", + "experimental_conductivity_concentration": "cm⁻³", + "experimental_conductivity_max": "S/cm", + "experimental_mobility_max": "cm²/V/s", +} + + +def _is_nan(value: Any) -> bool: + return isinstance(value, float) and math.isnan(value) + + +class TransparentConductorRecord(ContributionRecord): + """Typed entry for one transparent conductor contribution.""" + + aliases: dict[str, str] = ALIASES + units: dict[str, str] = UNITS + + doping: str = Field( + description="Doping class from source sheet (n-type or p-type)." + ) + studies: int | float | None = Field(None, description="Number of studies.") + quality: str | None = Field(None, description="Quality label (good or ok).") + dopants: str | None = Field(None, description="Common dopants.") + spacegroup: str | None = Field(None, description="Space group symbol.") + + bpe_ratio_min: float | int | str | None = Field( + None, description="Minimum BPE ratio." + ) + bpe_ratio_max: float | int | str | None = Field( + None, description="Maximum BPE ratio." + ) + bpe_ratio_mean: float | int | str | None = Field( + None, description="Mean BPE ratio." + ) + bpe_degenerate: str | bool | None = Field( + None, description="Whether degenerate bands exist." + ) + + computed_gap_hse06_band: float | int | str | None = Field( + None, description="Computed HSE06 band gap." + ) + computed_gap_hse06_direct: float | int | str | None = Field( + None, description="Computed HSE06 direct gap." + ) + computed_gap_pbe_band: float | int | str | None = Field( + None, description="Computed PBE band gap." + ) + computed_gap_pbe_direct: float | int | str | None = Field( + None, description="Computed PBE direct gap." + ) + computed_mstar_conditions: str | None = Field( + None, description="Effective mass conditions." + ) + computed_mstar_average: float | int | str | None = Field( + None, description="Average effective mass." + ) + computed_mstar_planar: float | int | str | None = Field( + None, description="Planar effective mass." + ) + computed_stability_e_hull: float | int | str | None = Field( + None, description="Energy above hull." + ) + computed_stability_e_pourbaix_hull: float | int | str | None = Field( + None, description="Energy above Pourbaix hull." + ) + + experimental_doping: str | None = Field( + None, description="Experimental doping type." + ) + experimental_gap_range_max: float | int | str | None = Field( + None, description="Maximum experimental gap." + ) + experimental_gap_reference_max: str | None = Field( + None, description="Reference for max experimental gap." + ) + experimental_gap_range_min: float | int | str | None = Field( + None, description="Minimum experimental gap." + ) + experimental_gap_reference_min: str | None = Field( + None, description="Reference for min experimental gap." + ) + + experimental_conductivity_concentration: float | int | str | None = Field( + None, description="Carrier concentration at max conductivity." + ) + experimental_conductivity_dopant: str | None = Field( + None, description="Dopant for max conductivity." + ) + experimental_conductivity_max: float | int | str | None = Field( + None, description="Maximum experimental conductivity." + ) + experimental_conductivity_reference: str | None = Field( + None, description="Reference for conductivity data." + ) + experimental_conductivity_method: str | None = Field( + None, description="Synthesis method for conductivity." + ) + + experimental_mobility_dopant: str | None = Field( + None, description="Dopant for max mobility." + ) + experimental_mobility_max: float | int | str | None = Field( + None, description="Maximum experimental mobility." + ) + experimental_mobility_reference: str | None = Field( + None, description="Reference for mobility data." + ) + experimental_mobility_method: str | None = Field( + None, description="Synthesis method for mobility." + ) + + @classmethod + def _clean_header_key(cls, keys: tuple[str, ...]) -> str: + key = ".".join( + [k.replace("TC", "").strip() for k in keys if not k.startswith("Unnamed:")] + ) + if key.endswith("experimental doping type"): + key = key.replace("Transport.", "") + key_split = key.split(".") + if len(key_split) > 2: + key = ".".join(key_split[1:]) + if key.endswith("google scholar"): + key = key.replace(".google scholar", "") + return key + + @staticmethod + def _normalize_unit(unit: str) -> str: + unit = unit.replace("^-3", "⁻³").replace("^20", "²⁰") + unit = unit.replace("V2/cms", "cm²/V/s").replace("cm^2/Vs", "cm²/V/s") + return unit + + @classmethod + def from_sheet_row( + cls, row: dict[tuple[str, ...], Any], doping: str + ) -> TransparentConductorRecord | None: + """Convert one spreadsheet row to a validated transparent conductor record.""" + identifier: str | None = None + extracted: dict[str, Any] = {"doping": doping} + + for keys, value in row.items(): + key = cls._clean_header_key(keys) + if key.endswith("MP link") or key.endswith("range"): + continue + + if key == "Material.mpid": + if identifier is None: + if _is_nan(value): + return None + identifier = str(value).strip() + continue + + if key == "Material.p pretty formula": + key = "formula" + + if isinstance(value, str): + normalized: Any = value.strip() + else: + if _is_nan(value): + continue + if key.endswith(")"): + key, unit = key.rsplit(" (", 1) + unit = cls._normalize_unit(unit[:-1]) + if "," in unit: + extra_key = key.rsplit(".", 1)[0].lower() + ".conditions" + extracted[extra_key] = unit + normalized = value + + if normalized in ("", None): + continue + + clean_key = key.replace(" for VB:CB = 4:2", "").replace("?", "").lower() + extracted[clean_key] = normalized + + if not identifier: + return None + + fields = {"identifier": identifier, "formula": extracted.get("formula")} + for raw_key, field_name in RAW_TO_FIELD.items(): + if raw_key in extracted: + fields[field_name] = extracted[raw_key] + + return cls(**fields) diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py new file mode 100644 index 000000000..5b720b527 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/__init__.py @@ -0,0 +1,4 @@ +"""2dmatpedia project schemas and pipelines.""" + +from .pipelines import TwoDMatPediaETL +from .schemas import TwoDMatPediaRecord diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py new file mode 100644 index 000000000..f843a0c89 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/pipelines.py @@ -0,0 +1,39 @@ +"""2dmatpedia ETL pipeline migrated from notebook logic.""" + +from __future__ import annotations + +import gzip +import json +from collections.abc import Iterable +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import TYPE_CHECKING +from urllib.request import urlretrieve + +from monty.json import MontyDecoder + +from mpcontribs.lux.pipelines import LuxETL +from mpcontribs.lux.projects.twodmatpedia.schemas import TwoDMatPediaRecord + +if TYPE_CHECKING: + from typing import Any + + +class TwoDMatPediaETL(LuxETL): + """Extract, transform, and aggregate 2dmatpedia records.""" + + project: str = "2dmatpedia" + DB_JSON_URL: str = "http://www.2dmatpedia.org/static/db.json.gz" + + def extract(self) -> list[dict[str, Any]]: + """Download (if needed) and load raw JSONL records from db.json.gz.""" + + decoder = MontyDecoder() + with NamedTemporaryFile(suffix=".json.gz") as f: + urlretrieve(self.DB_JSON_URL, f.name) + + with gzip.open(f.name, "rb") as handle: + raw_records: list[dict[str, Any]] = [ + decoder.decode(line) for line in handle + ] + return raw_records diff --git a/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py new file mode 100644 index 000000000..860992554 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/projects/twodmatpedia/schemas.py @@ -0,0 +1,76 @@ +"""Schemas for the 2dmatpedia project ETL migration.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pydantic import Field, model_validator + +from emmet.core.types.pymatgen_types.structure_adapter import StructureType + +from mpcontribs.lux.schemas import ContributionRecord + +if TYPE_CHECKING: + from typing import Any + +PROJECT_DESCRIPTION = """ +We start from the around 80000 inorganic compounds in the Materials Project database. A geometry-based +algorithm [PRL] was used to identify layered structures among these compounds. Two-dimensional (2D) +materials were theoretically exfoliated by extracting one cluster in the standard conventional unit cell +of the layered structures screened in the above steps. A 20 Å vacuum along the c axis was imposed to +minimize the interactions of image slabs by periodic condition. Structure matcher tools from Pymatgen were +used to find duplicates of the exfoliated 2D materials. The standard workflow developed by the Materials +Project was used to perform high-throughput calculations for all the layered bulk and 2D materials screened +in this project. The calculations were performed by density functional theory as implemented in the Vienna +Ab Initio Simulation Package (VASP) software with Perdew-Burke-Ernzerhof (PBE) approximation for the +exchange-correlation functional and the frozen-core all-electron projector-augmented wave (PAW) method for +the electron-ion interaction. The cutoff energy for the plane wave expansion was set to 520 eV. +""".strip() + +PROJECT_METADATA = { + "title": "2DMatPedia", + "long_title": "2D Materials Encyclopedia", + "owner": "migueldiascosta@nus.edu.sg", + "authors": "M. Dias Costa, F.Y. Ping, Z. Jun", + "description": PROJECT_DESCRIPTION, + "references": [ + {"label": "WWW", "url": "http://www.2dmatpedia.org"}, + {"label": "PRL", "url": "https://doi.org/10.1103/PhysRevLett.118.106101"}, + ], +} + +SOURCE_PREFIXES: set[str] = {"mp", "mvc", "2dm"} + + +class TwoDMatPediaRecord(ContributionRecord): + """Validated 2dmatpedia source record.""" + + source_id: str = Field(description="Source material identifier.") + + material_id: str | None = Field(None, description="2dmatpedia material identifier.") + discovery_process: str | None = Field( + None, description="Discovery process (top-down or bottom-up)." + ) + bandgap: float | None = Field(None, description="Band gap in eV.") + decomposition_energy: float | None = Field( + None, description="Decomposition energy in eV/atom." + ) + exfoliation_energy_per_atom: float | None = Field( + None, description="Exfoliation energy in eV/atom." + ) + energy_per_atom: float | None = Field(None, description="Energy in eV/atom.") + energy_vdw_per_atom: float | None = Field( + None, description="Van-der-Waals energy in eV/atom." + ) + total_magnetization: float | None = Field( + None, description="Total magnetization in Bohr magnetons." + ) + + units: dict[str, str] = { + "bandgap": "eV", + "decomposition_energy": "eV/atom", + "exfoliation_energy_per_atom": "eV/atom", + "energy_per_atom": "eV/atom", + "energy_vdw_per_atom": "eV/atom", + "total_magnetization": "mu_B", + } diff --git a/mpcontribs-lux/mpcontribs/lux/schemas.py b/mpcontribs-lux/mpcontribs/lux/schemas.py new file mode 100644 index 000000000..a7b3daa06 --- /dev/null +++ b/mpcontribs-lux/mpcontribs/lux/schemas.py @@ -0,0 +1,88 @@ +"""Pydantic schemas for this project. + +Replace these starter models with project-specific, fully documented schemas. +""" + +from __future__ import annotations + +from pydantic import BaseModel, Field, model_serializer +from typing import TYPE_CHECKING, get_args + +from emmet.core.types.pymatgen_types.structure_adapter import StructureType + +if TYPE_CHECKING: + from typing_extensions import Self + from typing import Any + +NON_DATA_FIELDS = { + "identifier", + "formula", + "units", + "aliases", + "structures", + "attachments", +} + + +class AttachmentRecord(BaseModel): + """Attachment metadata schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + name: str = Field(description="Attachment logical name") + mime_type: str | None = Field(default=None, description="Attachment MIME type") + + +class ContributionRecord(BaseModel): + """Core contribution row schema (starter).""" + + identifier: str = Field(description="Contribution identifier") + formula: str | None = Field(None, description="Reduced chemical formula") + + units: dict[str, str] = Field( + {}, description="mapping of column names to units.", exclude=True + ) + structures: list[StructureType] = Field( + [], description="Structures associated with this entry." + ) + attachments: list[AttachmentRecord] = Field([]) + aliases: dict[str, str] = Field( + {}, + description="Aliases of fields to use when generating column names.", + exclude=True, + ) + + def to_contribs_entry(self) -> dict[str, Any]: + """Format this entry as an MPContribs compatible entry.""" + return { + "identifier": self.identifier, + "formula": self.formula, + "data": { + self.aliases.get( + k, k + ): f"{getattr(self,k,None)} {self.units.get(k,'')}".strip() + for k in set(self.__class__.model_fields).difference(NON_DATA_FIELDS) + }, + "structures": self.structures, + "attachments": self.attachments, + } + + @property + def columns(self) -> dict[str, str]: + return { + self.aliases.get(k, k): self.units.get(k) + or ( + "" + if any(t in get_args(field.annotation) for t in (int, float)) + else None + ) + for k, field in self.__class__.model_fields.items() + if k not in NON_DATA_FIELDS + } + + @property + def metadata(self) -> dict[str, str]: + return { + self.aliases.get(k, k): field.description + for k, field in self.__class__.model_fields.items() + if k not in NON_DATA_FIELDS and field.description + }