Skip to content

Commit c699ae2

Browse files
author
notactuallyfinn
committed
finished implementation of CodemetaProcessPlugin
1 parent 9d3dc7f commit c699ae2

File tree

1 file changed

+46
-9
lines changed

1 file changed

+46
-9
lines changed

src/hermes/commands/process/standard_merge.py

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -862,8 +862,9 @@ def match_func(left: Any, right: Any) -> bool:
862862
class CodemetaProcessPlugin(HermesProcessPlugin):
863863
def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[str, None], MergeAction]]:
864864
try:
865-
strats = CodemetaProcessPlugin.get_schema_strategies()
866-
strats.update(CodemetaProcessPlugin.get_codemeta_strategies())
865+
subtypes_for_types = CodemetaProcessPlugin.get_schema_type_hierarchy()
866+
strats = CodemetaProcessPlugin.get_schema_strategies(subtypes_for_types)
867+
strats.update(CodemetaProcessPlugin.get_codemeta_strategies(subtypes_for_types))
867868
strats[None] = {None: MergeSet(DEFAULT_MATCH)}
868869
except Exception:
869870
strats = {**CODEMETA_STRATEGY}
@@ -872,10 +873,7 @@ def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[
872873
return strats
873874

874875
@classmethod
875-
def get_schema_strategies(cls):
876-
# get a set of all types that have to be handled separately
877-
special_types = set(MATCH_FUNCTION_FOR_TYPE.keys())
878-
876+
def get_schema_type_hierarchy(cls):
879877
# get and read csv file containing information on schema.org types
880878
# switch to schemaorg-current-https-types.csv on change of standard context in HERMES
881879
download = requests.get("https://schema.org/version/latest/schemaorg-current-http-types.csv")
@@ -897,6 +895,12 @@ def get_schema_strategies(cls):
897895
for other_type in subtypes_for_types:
898896
if super_type in subtypes_for_types[other_type]:
899897
subtypes_for_types[other_type].update(subtypes_for_types[super_type])
898+
return subtypes_for_types
899+
900+
@classmethod
901+
def get_schema_strategies(cls, subtypes_for_types):
902+
# get a set of all types that have to be handled separately
903+
special_types = set(MATCH_FUNCTION_FOR_TYPE.keys())
900904

901905
# get and read csv file containing information on schema.org properties
902906
# switch to schemaorg-current-https-properties.csv on change of standard context in HERMES
@@ -933,6 +937,39 @@ def get_schema_strategies(cls):
933937
return strategies
934938

935939
@classmethod
936-
def get_codemeta_strategies(cls):
937-
# FIXME: implement
938-
return {}
940+
def get_codemeta_strategies(cls, subtypes_for_types):
941+
# get a set of all types that have to be handled separately
942+
special_types = set(MATCH_FUNCTION_FOR_TYPE.keys())
943+
944+
# FIXME: change URL on change of context to codemeta 3.0
945+
download = requests.get("https://github.com/codemeta/codemeta/blob/2.0/crosswalk.csv")
946+
decoded_content = download.content.decode('utf-8')
947+
cr = csv.reader(decoded_content.splitlines(), delimiter=',')
948+
# remove the first line (headers)
949+
property_table = list(cr)[1:]
950+
strategies = {}
951+
for property_row in property_table:
952+
if property_row[0] == "schema" or len(property_row[0]) == 0:
953+
# skip empty rows
954+
continue
955+
# generate a set of all types this property can have values of
956+
shallow_range_types = set(iri["schema:" + range_type] for range_type in property_row[2].split(" or "))
957+
range_types = shallow_range_types.union(
958+
*(subtypes_for_types.get(range_type, set()) for range_type in shallow_range_types)
959+
)
960+
# get all special types this property can have values of
961+
special_range_types = special_types.intersection(range_types)
962+
# if there is a special range type this property needs a special match function
963+
if len(special_range_types) != 0:
964+
# construct the match function
965+
match_function = MergeSet(match_multiple_types(
966+
*((range_type, MATCH_FUNCTION_FOR_TYPE[range_type]) for range_type in special_range_types),
967+
fall_back_function=DEFAULT_MATCH
968+
))
969+
# iterate over a set of all types this property can occur in
970+
shallow_domain_type = {iri[property_row[0]]}
971+
for domain_type in shallow_domain_type.union(subtypes_for_types.get(shallow_domain_type, set())):
972+
# add the match function to the types match functions
973+
strategies.setdefault(domain_type, {})[iri[property_row[1]]] = match_function
974+
# return the strategies
975+
return strategies

0 commit comments

Comments
 (0)