@@ -862,8 +862,9 @@ def match_func(left: Any, right: Any) -> bool:
862862class CodemetaProcessPlugin (HermesProcessPlugin ):
863863 def __call__ (self , command : HermesCommand ) -> dict [Union [str , None ], dict [Union [str , None ], MergeAction ]]:
864864 try :
865- strats = CodemetaProcessPlugin .get_schema_strategies ()
866- strats .update (CodemetaProcessPlugin .get_codemeta_strategies ())
865+ subtypes_for_types = CodemetaProcessPlugin .get_schema_type_hierarchy ()
866+ strats = CodemetaProcessPlugin .get_schema_strategies (subtypes_for_types )
867+ strats .update (CodemetaProcessPlugin .get_codemeta_strategies (subtypes_for_types ))
867868 strats [None ] = {None : MergeSet (DEFAULT_MATCH )}
868869 except Exception :
869870 strats = {** CODEMETA_STRATEGY }
@@ -872,10 +873,7 @@ def __call__(self, command: HermesCommand) -> dict[Union[str, None], dict[Union[
872873 return strats
873874
874875 @classmethod
875- def get_schema_strategies (cls ):
876- # get a set of all types that have to be handled separately
877- special_types = set (MATCH_FUNCTION_FOR_TYPE .keys ())
878-
876+ def get_schema_type_hierarchy (cls ):
879877 # get and read csv file containing information on schema.org types
880878 # switch to schemaorg-current-https-types.csv on change of standard context in HERMES
881879 download = requests .get ("https://schema.org/version/latest/schemaorg-current-http-types.csv" )
@@ -897,6 +895,12 @@ def get_schema_strategies(cls):
897895 for other_type in subtypes_for_types :
898896 if super_type in subtypes_for_types [other_type ]:
899897 subtypes_for_types [other_type ].update (subtypes_for_types [super_type ])
898+ return subtypes_for_types
899+
900+ @classmethod
901+ def get_schema_strategies (cls , subtypes_for_types ):
902+ # get a set of all types that have to be handled separately
903+ special_types = set (MATCH_FUNCTION_FOR_TYPE .keys ())
900904
901905 # get and read csv file containing information on schema.org properties
902906 # switch to schemaorg-current-https-properties.csv on change of standard context in HERMES
@@ -933,6 +937,39 @@ def get_schema_strategies(cls):
933937 return strategies
934938
935939 @classmethod
936- def get_codemeta_strategies (cls ):
937- # FIXME: implement
938- return {}
940+ def get_codemeta_strategies (cls , subtypes_for_types ):
941+ # get a set of all types that have to be handled separately
942+ special_types = set (MATCH_FUNCTION_FOR_TYPE .keys ())
943+
944+ # FIXME: change URL on change of context to codemeta 3.0
945+ download = requests .get ("https://github.com/codemeta/codemeta/blob/2.0/crosswalk.csv" )
946+ decoded_content = download .content .decode ('utf-8' )
947+ cr = csv .reader (decoded_content .splitlines (), delimiter = ',' )
948+ # remove the first line (headers)
949+ property_table = list (cr )[1 :]
950+ strategies = {}
951+ for property_row in property_table :
952+ if property_row [0 ] == "schema" or len (property_row [0 ]) == 0 :
953+ # skip empty rows
954+ continue
955+ # generate a set of all types this property can have values of
956+ shallow_range_types = set (iri ["schema:" + range_type ] for range_type in property_row [2 ].split (" or " ))
957+ range_types = shallow_range_types .union (
958+ * (subtypes_for_types .get (range_type , set ()) for range_type in shallow_range_types )
959+ )
960+ # get all special types this property can have values of
961+ special_range_types = special_types .intersection (range_types )
962+ # if there is a special range type this property needs a special match function
963+ if len (special_range_types ) != 0 :
964+ # construct the match function
965+ match_function = MergeSet (match_multiple_types (
966+ * ((range_type , MATCH_FUNCTION_FOR_TYPE [range_type ]) for range_type in special_range_types ),
967+ fall_back_function = DEFAULT_MATCH
968+ ))
969+ # iterate over a set of all types this property can occur in
970+ shallow_domain_type = {iri [property_row [0 ]]}
971+ for domain_type in shallow_domain_type .union (subtypes_for_types .get (shallow_domain_type , set ())):
972+ # add the match function to the types match functions
973+ strategies .setdefault (domain_type , {})[iri [property_row [1 ]]] = match_function
974+ # return the strategies
975+ return strategies
0 commit comments