added RALP model for link prediction

alkidbaci · alkidbaci · commit 24809b1d7c16 · 2025-03-14T13:58:33.000+01:00
diff --git a/dicee/knowledge_graph.py b/dicee/knowledge_graph.py
@@ -33,7 +33,7 @@ def __init__(self, dataset_dir: str = None,
         sample_triples_ratio
         :param training_technique
         """
-        assert dataset_dir is not None, f"dataset_dir cannot be None"
+        assert dataset_dir is not None, "dataset_dir cannot be None"
         self.dataset_dir = dataset_dir
         self.sparql_endpoint = sparql_endpoint
         self.path_single_kg = path_single_kg
diff --git a/retrieval_augmented_link_predictor.py b/retrieval_augmented_link_predictor.py
@@ -63,6 +63,7 @@
 from dicee.evaluator import evaluate_lp
 from abc import ABC, abstractmethod
 import torch
+import re
 
 class KnowledgeGraphPredictor:
     """
@@ -407,11 +408,96 @@ def __call__(self,indexed_triples:torch.LongTensor):
             scores.append([0.0])
         return torch.FloatTensor(scores)
 
+
+class RALP(AbstractBaseLinkPredictorClass):
+    def __init__(self, knowledge_graph: KG = None,
+                 name="ralp-1.0",
+                 base_url="http://tentris-ml.cs.upb.de:8501/v1",
+                 api_key=None,
+                 model="tentris")-> None:
+        super().__init__(knowledge_graph, name)
+        self.client = OpenAI(base_url=base_url, api_key=api_key)
+        self.model = model
+
+    def extract_float(self, text):
+        """Extract the float number from a string. Used mainly to filter the LLM-output for the scoring task."""
+        pattern = r"-?\d*\.\d+|-?\d+\.\d*"
+        match = re.search(pattern, text)
+        return float(match.group()) if match else 0.0
+
+    def ru(self, entity):
+        """Remove underscore from the entity (as str)."""
+        return entity.replace("_", " ")
+
+    def get_score(self, triple: tuple, triples_h: str) -> float:
+        system_prompt = """You are an expert in knowledge graphs and link prediction. Your task is to assign a plausibility score (from 0 to 1) to a given triple (subject, predicate, object) based on a set of known training triples for the same subject. 
+
+        - A score of 1.0 means the triple is highly likely to be true.  
+        - A score of 0.0 means the triple is highly unlikely to be true.  
+        - Intermediate values (e.g., 0.4, 0.7) reflect varying levels of plausibility.
+        
+
+        **Guidelines for scoring:**
+        1. **Exact Match:** If the triple already exists in the training set or if the facts clearly state that the triple must be true assign a score close to 1.0.
+        2. **Pattern Matching:** If the predicate-object pair frequently occurs for the given subject, assign a high score.
+        3. **Semantic Similarity:** If the object is semantically close to known objects for the subject-predicate pair, assign a moderate to high score.
+        4. **Rare or Unseen Combinations:** If the triple does not follow the learned patterns, assign a low score.
+        5. **Contradictions:** If the triple contradicts existing facts (perform your own reasoning), assign a very low score.
+
+        You must analyze the given triple and the training triples, apply the reasoning above, and output only a single **floating-point score** between **0.0 and 1.0**, without any explanation or additional text.
+        Do not depend only on triples provided to you, also use your own knowledge as an AI assistant to reason about the truthness of the given triple as a fact.
+        You are strictly required to provide only the score as an answer and do not explain it."""
+
+        user_prompt = f"""Here is the triple we want to evaluate:
+        (subject: {triple[0]}, predicate: {triple[1]}, object: {triple[2]})
+
+        Here are the known training triples for the subject "{triple[0]}":
+        {triples_h}
+
+        Assign a score to the given triple based on the provided training triples.
+        """
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+        )
+
+        # Extract the response content
+        content = response.choices[0].message.content
+        return self.extract_float(content)
+
+    def __call__(self, indexed_triples: torch.LongTensor):
+        n, d = indexed_triples.shape
+        # For the time being
+        assert d == 3
+        assert n == 1
+        scores = []
+        for triple in indexed_triples.tolist():
+            idx_h, idx_r, idx_t = triple
+            h, r, t = self.idx_to_entity[idx_h], self.idx_to_relation[idx_r], self.idx_to_entity[idx_t]
+
+            # Retrieve triples where 'h' is a subject or an object
+            triples_h = [trp for trp in self.kg.train_set if (trp[0] == idx_h or trp[2] == idx_h)]
+
+            # Format the triples into structured string output that will be used in the prompt.
+            triples_h_str = ""
+            for trp in triples_h:
+                triples_h_str += f'- ("{self.ru(self.idx_to_entity[trp[0]])}", "{self.ru(self.idx_to_relation[trp[1]])}", "{self.ru(self.idx_to_entity[trp[2]])}") \n'
+
+            # Get the score from the LLM
+            score = self.get_score((h, r, t), triples_h)
+            scores.append([score])
+        return torch.FloatTensor(scores)
+
+
 if __name__ == "__main__":
     # () Read / Preprocess KG
     kg = KG(dataset_dir="KGs/Countries-S1",separator="\s+",eval_model="train_val_test")
 
-    evaluate_lp(model=Dummy(knowledge_graph=kg), triple_idx=kg.train_set, num_entities=len(kg.entity_to_idx), er_vocab=kg.er_vocab,
+    # It takes ~14 h to evaluate this model :/
+    evaluate_lp(model=RALP(knowledge_graph=kg, api_key="API_KEY"), triple_idx=kg.train_set, num_entities=len(kg.entity_to_idx), er_vocab=kg.er_vocab,
                 re_vocab=kg.re_vocab,  info='Eval LP Starts', batch_size=1, chunk_size=1)
 
     # @TODO: Create classes inherits from AbstractBaseLinkPredictorClass and improve the link prediction results