Skip to content

Commit 9d3dc7f

Browse files
author
notactuallyfinn
committed
added test_case and generate strategies automatically
1 parent 7a8e8ae commit 9d3dc7f

File tree

2 files changed

+275
-76
lines changed

2 files changed

+275
-76
lines changed

src/hermes/commands/process/standard_merge.py

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,32 @@
55
# SPDX-FileContributor: Michael Fritzsche
66

77

8+
import csv
89
from typing import Any, Callable, Union
910

11+
import requests
12+
1013
from hermes.commands.base import HermesCommand
1114
from hermes.model.merge.action import Concat, MergeAction, MergeSet
1215
from hermes.model.types import ld_dict
1316
from hermes.model.types.ld_context import iri_map as iri
1417
from .base import HermesProcessPlugin
1518

1619

20+
def match_equals(left: Any, right: Any) -> bool:
21+
"""
22+
Compares two objects with ==.
23+
24+
Args:
25+
left (Any): The first object for the comparison.
26+
right (Any): The second object for the comparison.
27+
28+
Returns:
29+
bool: The result of the comparison.
30+
"""
31+
return left == right
32+
33+
1734
def match_keys(*keys: list[str], fall_back_to_equals: bool = False) -> Callable[[Any, Any], bool]:
1835
"""
1936
Creates a function taking to parameters that returns true
@@ -140,18 +157,18 @@ def match_func(left: Any, right: Any) -> bool:
140157
DEFAULT_MATCH = match_keys("@id", fall_back_to_equals=True)
141158
""" Callable[[Any, Any], bool]: The default match function used for comparison. """
142159

143-
MATCH_FUNCTION_FOR_TYPE = {"schema:Person": match_person}
160+
MATCH_FUNCTION_FOR_TYPE = {iri["schema:Person"]: match_person}
144161
"""
145162
dict[str, Callable[[Any, Any], bool]]: A dict containing for JSON_LD types the match function (not DEFAULT_MATCH).
146163
"""
147164

148165
ACTIONS = {
149166
"default": MergeSet(DEFAULT_MATCH),
150167
"concat": Concat(),
151-
"Person": MergeSet(MATCH_FUNCTION_FOR_TYPE["schema:Person"]),
168+
"Person": MergeSet(MATCH_FUNCTION_FOR_TYPE[iri["schema:Person"]]),
152169
**{
153170
"Or".join(types): MergeSet(match_multiple_types(
154-
*(("schema:" + type, MATCH_FUNCTION_FOR_TYPE.get("schema:" + type, DEFAULT_MATCH)) for type in types)
171+
*(("schema:" + type, MATCH_FUNCTION_FOR_TYPE.get(iri["schema:" + type], DEFAULT_MATCH)) for type in types)
155172
))
156173
for types in [
157174
("AboutPage", "CreativeWork"),
@@ -844,7 +861,78 @@ def match_func(left: Any, right: Any) -> bool:
844861

845862
class CodemetaProcessPlugin(HermesProcessPlugin):
846863
def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]:
847-
strats = {**CODEMETA_STRATEGY}
864+
try:
865+
strats = CodemetaProcessPlugin.get_schema_strategies()
866+
strats.update(CodemetaProcessPlugin.get_codemeta_strategies())
867+
strats[None] = {None: MergeSet(DEFAULT_MATCH)}
868+
except Exception:
869+
strats = {**CODEMETA_STRATEGY}
848870
for key, value in PROV_STRATEGY.items():
849871
strats[key] = {**value, **strats.get(key, {})}
850872
return strats
873+
874+
@classmethod
875+
def get_schema_strategies(cls):
876+
# get a set of all types that have to be handled separately
877+
special_types = set(MATCH_FUNCTION_FOR_TYPE.keys())
878+
879+
# get and read csv file containing information on schema.org types
880+
# switch to schemaorg-current-https-types.csv on change of standard context in HERMES
881+
download = requests.get("https://schema.org/version/latest/schemaorg-current-http-types.csv")
882+
decoded_content = download.content.decode('utf-8')
883+
cr = csv.reader(decoded_content.splitlines(), delimiter=',')
884+
# remove the first line (headers)
885+
type_table = list(cr)[1:]
886+
# build list of all subtypes for every type
887+
subtypes_for_types = {}
888+
for type_row in type_table:
889+
if len(type_row[7]) == 0:
890+
# no (direct) subtype
891+
subtypes_for_types[type_row[0]] = set()
892+
else:
893+
# add direct subtypes
894+
subtypes_for_types[type_row[0]] = set(type_row[7].split(", "))
895+
# only immediate subtypes have been recorded now, add sub...subtypes too
896+
for super_type in subtypes_for_types:
897+
for other_type in subtypes_for_types:
898+
if super_type in subtypes_for_types[other_type]:
899+
subtypes_for_types[other_type].update(subtypes_for_types[super_type])
900+
901+
# get and read csv file containing information on schema.org properties
902+
# switch to schemaorg-current-https-properties.csv on change of standard context in HERMES
903+
download = requests.get("https://schema.org/version/latest/schemaorg-current-http-properties.csv")
904+
decoded_content = download.content.decode('utf-8')
905+
cr = csv.reader(decoded_content.splitlines(), delimiter=',')
906+
# remove the first line (headers)
907+
property_table = list(cr)[1:]
908+
strategies = {}
909+
# add the strategies for all properties to all types they can occur in
910+
for property_row in property_table:
911+
# generate a set of all types this property can have values of
912+
shallow_range_types = set(property_row[7].split(", ")) if property_row[7] != "" else set()
913+
range_types = shallow_range_types.union(
914+
*(subtypes_for_types.get(range_type, set()) for range_type in shallow_range_types)
915+
)
916+
# get all special types this property can have values of
917+
special_range_types = special_types.intersection(range_types)
918+
# if there is a special range type this property needs a special match function
919+
if len(special_range_types) != 0:
920+
# construct the match function
921+
match_function = MergeSet(match_multiple_types(
922+
*((range_type, MATCH_FUNCTION_FOR_TYPE[range_type]) for range_type in special_range_types),
923+
fall_back_function=DEFAULT_MATCH
924+
))
925+
# iterate over a set of all types this property can occur in
926+
shallow_domain_types = set(property_row[6].split(", ")) if property_row[6] != "" else set()
927+
for domain_type in shallow_domain_types.union(
928+
*(subtypes_for_types.get(domain_type, set()) for domain_type in shallow_domain_types)
929+
):
930+
# add the match function to the types match functions
931+
strategies.setdefault(domain_type, {})[property_row[0]] = match_function
932+
# return the strategies
933+
return strategies
934+
935+
@classmethod
936+
def get_codemeta_strategies(cls):
937+
# FIXME: implement
938+
return {}

0 commit comments

Comments
 (0)