Skip to content

Commit feeb16b

Browse files
author
notactuallyfinn
committed
worked on invenio deposit
1 parent 6c3ba13 commit feeb16b

4 files changed

Lines changed: 195 additions & 41 deletions

File tree

src/hermes/commands/deposit/base.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,25 @@ def __call__(self, command: HermesCommand) -> None:
3434
self.metadata = SoftwareMetadata.load_from_cache(self.ctx, "result")
3535
self.ctx.finalize_step("curate")
3636

37-
self.ctx.prepare_step("deposit")
38-
3937
self.prepare()
40-
self.map_metadata()
38+
deposit = self.map_metadata()
39+
self.ctx.prepare_step("deposit")
40+
with self.ctx[command.settings.target] as cache:
41+
cache["deposit"] = deposit.compact()
42+
self.ctx.finalize_step("deposit")
4143

4244
if self.is_initial_publication():
4345
self.create_initial_version()
4446
else:
4547
self.create_new_version()
4648

47-
self.update_metadata()
49+
deposit = self.update_metadata()
50+
self.ctx.prepare_step("deposit")
51+
with self.ctx[command.settings.target] as cache:
52+
cache["codemeta"] = deposit.compact()
53+
cache["expanded"] = deposit.ld_value
54+
cache["context"] = {"@context": deposit.full_context}
55+
self.ctx.finalize_step("deposit")
4856
self.delete_artifacts()
4957
self.upload_artifacts()
5058
self.publish()
@@ -59,8 +67,8 @@ def prepare(self) -> None:
5967
pass
6068

6169
@abc.abstractmethod
62-
def map_metadata(self) -> None:
63-
"""Map the given metadata to the target schema of the deposition platform.
70+
def map_metadata(self) -> SoftwareMetadata:
71+
"""Map the given metadata to the target schema of the deposition platform and return it.
6472
6573
When mapping metadata, make sure to add traces to the HERMES software, e.g. via
6674
DataCite's ``relatedIdentifier`` using the ``isCompiledBy`` relation. Ideally, the value
@@ -89,9 +97,9 @@ def create_new_version(self) -> None:
8997
"""Create a new version of an existing publication on the target platform."""
9098
pass
9199

92-
def update_metadata(self) -> None:
93-
"""Update the metadata of the newly created version."""
94-
pass
100+
def update_metadata(self) -> SoftwareMetadata:
101+
"""Update the metadata of the newly created version and return it even if it hasn't changed."""
102+
return self.metadata
95103

96104
def delete_artifacts(self) -> None:
97105
"""Delete any superfluous artifacts taken from the previous version of the publication."""
@@ -131,10 +139,11 @@ def __call__(self, args: argparse.Namespace) -> None:
131139

132140
try:
133141
plugin_func = self.plugins[plugin_name]()
134-
plugin_func(self)
135142
except KeyError as e:
136143
self.log.error("Plugin '%s' not found.", plugin_name)
137144
self.errors.append(e)
145+
try:
146+
plugin_func(self)
138147
except HermesValidationError as e:
139148
self.log.error("Error while executing %s: %s", plugin_name, e)
140149
self.errors.append(e)

src/hermes/commands/deposit/file.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pydantic import BaseModel
1212

1313
from hermes.commands.deposit.base import BaseDepositPlugin
14-
14+
from hermes.model import SoftwareMetadata
1515

1616
class FileDepositSettings(BaseModel):
1717
filename: str = 'codemeta.json'
@@ -20,6 +20,9 @@ class FileDepositSettings(BaseModel):
2020
class FileDepositPlugin(BaseDepositPlugin):
2121
settings_class = FileDepositSettings
2222

23+
def map_metadata(self) -> SoftwareMetadata:
24+
return self.metadata
25+
2326
def publish(self) -> None:
2427
file_config = self.command.settings.file
2528

src/hermes/commands/deposit/invenio.py

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,21 @@
66
# SPDX-FileContributor: Oliver Bertuch
77
# SPDX-FileContributor: Michael Meinel
88

9-
import json
109
import logging
1110
import pathlib
12-
import typing as t
1311
from datetime import date, datetime
1412
from pathlib import Path
1513
from urllib.parse import urlparse
1614

1715
import requests
1816
from pydantic import BaseModel
17+
from typing import Union
1918

2019
from hermes.commands.deposit.base import BaseDepositPlugin
2120
from hermes.commands.deposit.error import DepositionUnauthorizedError
2221
from hermes.error import MisconfigurationError
23-
from hermes.model.context_manager import HermesContext
22+
from hermes.model import SoftwareMetadata
23+
from hermes.model.error import HermesValidationError
2424
from hermes.utils import hermes_doi, hermes_user_agent
2525

2626

@@ -108,7 +108,7 @@ def __init__(self, client=None):
108108

109109
def resolve_latest_id(
110110
self, record_id=None, doi=None, codemeta_identifier=None
111-
) -> t.Tuple[t.Optional[str], dict]:
111+
) -> tuple[Union[str, None], dict]:
112112
"""
113113
Using the given metadata parameters, figure out the latest record id.
114114
@@ -166,7 +166,7 @@ def resolve_doi(self, doi) -> str:
166166
*_, record_id = page_url.path.split('/')
167167
return record_id
168168

169-
def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]:
169+
def resolve_record_id(self, record_id: str) -> tuple[str, dict]:
170170
"""
171171
Find the latest version of a given record.
172172
@@ -185,7 +185,7 @@ def resolve_record_id(self, record_id: str) -> t.Tuple[str, dict]:
185185
res_json = res.json()
186186
return res_json['id'], res_json['metadata']
187187

188-
def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]:
188+
def resolve_license_id(self, license_url: Union[str, None]) -> Union[str, None]:
189189
"""Get Invenio license representation from CodeMeta.
190190
191191
The license to use is extracted from the ``license`` field in the
@@ -218,7 +218,7 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]:
218218

219219
parsed_url = urlparse(license_url)
220220
url_path = parsed_url.path.rstrip("/")
221-
license_id = url_path.split("/")[-1]
221+
license_id = str.lower(url_path.split("/")[-1])
222222

223223
response = self.client.get_license(license_id)
224224
if response.status_code == 404:
@@ -230,7 +230,8 @@ def resolve_license_id(self, license_url: t.Optional[str]) -> t.Optional[str]:
230230

231231
@staticmethod
232232
def _extract_license_id_from_response(data: dict) -> str:
233-
return data["metadata"]["id"]
233+
# TODO: find correct key, data["metadata"]["id"] did not work for me but data["id"] does
234+
return data["id"]
234235

235236

236237
class InvenioDepositSettings(BaseModel):
@@ -242,7 +243,7 @@ class InvenioDepositSettings(BaseModel):
242243
access_right: str = None
243244
embargo_date: str = None
244245
access_conditions: str = None
245-
api_paths: t.Dict = {}
246+
api_paths: dict = {}
246247
auth_token: str = ''
247248
files: list[pathlib.Path] = []
248249

@@ -335,15 +336,10 @@ def prepare(self) -> None:
335336

336337
self.invenio_ctx = deposition_data
337338

338-
def map_metadata(self) -> None:
339-
"""Map the harvested metadata onto the Invenio schema."""
340-
341-
deposition_metadata = self._codemeta_to_invenio_deposition()
342-
ctx = HermesContext()
343-
ctx.prepare_step("deposit")
344-
with ctx[self.platform_name] as deposit_ctx:
345-
deposit_ctx["deposit"] = deposition_metadata
346-
ctx.finalize_step("deposit")
339+
def map_metadata(self) -> SoftwareMetadata:
340+
"""Map the harvested metadata onto the Invenio schema and return it."""
341+
self.invenio_ctx["depositionMetadata"] = self._codemeta_to_invenio_deposition()
342+
return SoftwareMetadata(self.invenio_ctx["depositionMetadata"])
347343

348344
def is_initial_publication(self) -> bool:
349345
latest_record_id = self.invenio_ctx.get("latestRecord", {}).get("id")
@@ -402,8 +398,8 @@ def related_identifiers(self):
402398
},
403399
]
404400

405-
def update_metadata(self) -> None:
406-
"""Update the metadata of a draft."""
401+
def update_metadata(self) -> SoftwareMetadata:
402+
"""Update the metadata of a draft and return it."""
407403

408404
draft_url = self.links["latest_draft"]
409405

@@ -422,8 +418,7 @@ def update_metadata(self) -> None:
422418
self.links.update(deposit["links"])
423419

424420
_log.debug("Created new version deposit: %s", self.links["html"])
425-
with open(self.metadata.get_cache('deposit', 'deposit', create=True), 'w') as deposit_file:
426-
json.dump(deposit, deposit_file, indent=4)
421+
return SoftwareMetadata(deposit.get("metadata", {}))
427422

428423
def delete_artifacts(self) -> None:
429424
"""Delete existing file artifacts.
@@ -444,7 +439,10 @@ def upload_artifacts(self) -> None:
444439

445440
bucket_url = self.links["bucket"]
446441

447-
files = *self.config.files, *[f[0] for f in self.command.args.file]
442+
if self.command.args.file:
443+
files = *self.config.files, *[f[0] for f in self.command.args.file]
444+
else:
445+
files = tuple(*self.config.files)
448446
for path_arg in files:
449447
path = Path(path_arg)
450448

@@ -508,7 +506,22 @@ def _codemeta_to_invenio_deposition(self) -> dict:
508506
embargo_date = self.invenio_ctx["embargo_date"]
509507
access_conditions = self.invenio_ctx["access_conditions"]
510508

511-
creators = [
509+
creators = []
510+
for author in metadata["author"]:
511+
creator = {}
512+
if len(affils := [name for affil in author["affiliation"] for name in affil["legalname"]]) != 0:
513+
creator["affiliation"] = affils
514+
given_names_str = " ".join(author["givenName"])
515+
names = [f"{family_name}, {given_names_str}" for family_name in author["familyName"]]
516+
names.extend(author["names"])
517+
if len(names) != 0:
518+
creator["name"] = names
519+
if (id := author.get("@id", None)) is not None:
520+
creator["orcid"] = id.replace("https://orcid.org/", "")
521+
if creator:
522+
creators.append(creator)
523+
524+
"""creators = [
512525
# TODO: Distinguish between @type "Person" and others
513526
{
514527
k: v for k, v in {
@@ -523,7 +536,7 @@ def _codemeta_to_invenio_deposition(self) -> dict:
523536
}.items() if v is not None
524537
}
525538
for author in metadata["author"]
526-
]
539+
]"""
527540

528541
# This is not used at the moment. See comment below in `deposition_metadata` dict.
529542
contributors = [ # noqa: F841
@@ -546,6 +559,27 @@ def _codemeta_to_invenio_deposition(self) -> dict:
546559
for contributor in metadata.get("contributor", []) if contributor.get("name") != "GitHub"
547560
]
548561

562+
if len(metadata["name"]) != 1:
563+
_log.error("More than one or zero names for the Software are given.")
564+
raise HermesValidationError("More than one or zerno names for the Software.")
565+
name = metadata["name"][0]
566+
567+
if len(metadata["schema:description"]) > 1:
568+
_log.error("More than one descriptions of the Software are given.")
569+
raise HermesValidationError("More than one descriptions of the Software are given.")
570+
if len(metadata["schema:description"]) == 1:
571+
description = metadata["schema:description"][0]
572+
else:
573+
description = None
574+
575+
if len(metadata["schema:version"]) > 1:
576+
_log.error("More than one version of the Software are given.")
577+
raise HermesValidationError("More than one version of the Software are given.")
578+
if len(metadata["schema:version"]) == 1:
579+
version = metadata["schema:version"][0]
580+
else:
581+
version = None
582+
549583
# TODO: Use the fields currently set to `None`.
550584
# Some more fields are available but they most likely don't relate to software
551585
# publications targeted by hermes.
@@ -559,12 +593,12 @@ def _codemeta_to_invenio_deposition(self) -> dict:
559593
# TODO: Maybe we want a different date? Then make this configurable. If not,
560594
# this can be removed as it defaults to today.
561595
"publication_date": date.today().isoformat(),
562-
"title": metadata["name"],
596+
"title": name,
563597
"creators": creators,
564598
# TODO: Use a real description here. Possible sources could be
565599
# `tool.poetry.description` from pyproject.toml or `abstract` from
566600
# CITATION.cff. This should then be stored in codemeta description field.
567-
"description": metadata["name"],
601+
"description": description,
568602
"access_right": access_right,
569603
"license": license,
570604
"embargo_date": embargo_date,
@@ -590,17 +624,17 @@ def _codemeta_to_invenio_deposition(self) -> dict:
590624
"communities": communities,
591625
"grants": None,
592626
"subjects": None,
593-
"version": metadata.get('version'),
627+
"version": version,
594628
}.items() if v is not None}
595629

596630
return deposition_metadata
597631

598-
def _get_license_identifier(self) -> t.Optional[str]:
632+
def _get_license_identifier(self) -> Union[str, None]:
599633
"""Get Invenio license identifier that matches the given license URL.
600634
601635
If no license is configured, ``None`` will be returned.
602636
"""
603-
license_url = self.metadata["license"]
637+
license_url = self.metadata["license"][0]
604638
return self.resolver.resolve_license_id(license_url)
605639

606640
def _get_community_identifiers(self):

0 commit comments

Comments
 (0)