Merge pull request #139 from lab-v2/update_parsing_logic

ColtonPayne · web-flow · commit c7ff7824519d · 2026-04-22T17:38:25.000-04:00
Relax parsing requirements to allow hyphens and periods
diff --git a/pyreason/scripts/utils/fact_parser.py b/pyreason/scripts/utils/fact_parser.py
@@ -1,6 +1,28 @@
 import pyreason.scripts.numba_wrapper.numba_types.interval_type as interval
 import re
 
+_PREDICATE_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_.\-]*$')
+_COMPONENT_RE = re.compile(r'^[a-zA-Z0-9_][a-zA-Z0-9_.@\-]*$')
+
+
+def _validate_predicate(name):
+    """Validate that a predicate name starts with a letter/underscore."""
+    if not name:
+        raise ValueError("Predicate name cannot be empty")
+    if not _PREDICATE_RE.match(name):
+        if name[0].isdigit():
+            raise ValueError(f"Predicate name '{name}' cannot start with a digit. Must start with a letter or underscore")
+        else:
+            raise ValueError(f"Predicate name '{name}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_.\\-]*")
+
+
+def _validate_component(name, context):
+    """Validate that a component (entity) name contains only valid characters. May start with a digit."""
+    if not name:
+        raise ValueError(f"{context} name cannot be empty")
+    if not _COMPONENT_RE.match(name):
+        raise ValueError(f"{context} name '{name}' contains invalid characters. Must match [a-zA-Z0-9_][a-zA-Z0-9_.@\\-]*")
+
 
 # Input validation work was implemented with the help of Claude Sonnet 4.5.
 def parse_fact(fact_text):
@@ -75,28 +97,13 @@ def parse_fact(fact_text):
     pred = pred_comp[:idx]
     component = pred_comp[idx + 1:-1]
 
-    # Validate predicate is not empty
-    if not pred:
-        raise ValueError("Predicate cannot be empty")
-
-    # Validate predicate contains only valid characters (alphanumeric and underscore)
-    # Predicates must start with a letter or underscore (like Python identifiers)
-    if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', pred):
-        if pred[0].isdigit():
-            raise ValueError(f"Predicate '{pred}' cannot start with a digit. Must start with a letter or underscore")
-        else:
-            raise ValueError(f"Predicate '{pred}' contains invalid characters. Only letters, digits, and underscores allowed, must start with letter or underscore")
+    # Validate predicate name
+    _validate_predicate(pred)
 
     # Validate component is not empty
     if not component:
         raise ValueError("Component cannot be empty")
 
-    # Check for invalid characters in component
-    if '(' in component or ')' in component:
-        raise ValueError("Component cannot contain parentheses")
-    if ':' in component:
-        raise ValueError("Component cannot contain colons")
-
     # Check if it is a node or edge fact
     if ',' in component:
         fact_type = 'edge'
@@ -106,14 +113,14 @@ def parse_fact(fact_text):
         if len(components) != 2:
             raise ValueError(f"Edge facts must have exactly 2 components, found {len(components)}")
 
-        # Validate no empty components
+        # Validate component names
         for i, comp in enumerate(components):
-            if not comp:
-                raise ValueError(f"Component {i+1} in edge fact cannot be empty")
+            _validate_component(comp, f"Edge component {i+1}")
 
         component = tuple(components)
     else:
         fact_type = 'node'
+        _validate_component(component, "Node component")
 
     # Check if bound is a boolean or a list of floats
     if bound.lower() == 'true':
diff --git a/pyreason/scripts/utils/rule_parser.py b/pyreason/scripts/utils/rule_parser.py
@@ -10,7 +10,8 @@
 import pyreason.scripts.numba_wrapper.numba_types.interval_type as interval
 from pyreason.scripts.threshold.threshold import Threshold
 
-_IDENTIFIER_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
+_PREDICATE_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_.\-]*$')
+_COMPONENT_RE = re.compile(r'^[a-zA-Z0-9_][a-zA-Z0-9_.@\-]*$')
 
 
 def parse_rule(rule_text: str, name: str, custom_thresholds: Union[None, list, dict], infer_edges: bool = False, set_static: bool = False, weights: Union[None, np.ndarray] = None) -> rule.Rule:
@@ -482,19 +483,17 @@ def _parse_head_arguments(head_args_str):
 
 
 def _validate_predicate_name(pred, context):
-    """Validate that a predicate name matches ^[a-zA-Z_][a-zA-Z0-9_]*$."""
-    if not _IDENTIFIER_RE.match(pred):
+    """Validate that a predicate name matches ^[a-zA-Z_][a-zA-Z0-9_.\\-]*$."""
+    if not _PREDICATE_RE.match(pred):
         if pred and pred[0].isdigit():
             raise ValueError(f"{context} predicate name '{pred}' cannot start with a digit")
-        raise ValueError(f"{context} predicate name '{pred}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_]*")
+        raise ValueError(f"{context} predicate name '{pred}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_.\\-]*")
 
 
 def _validate_component_name(var, context):
-    """Validate that a variable name matches ^[a-zA-Z_][a-zA-Z0-9_]*$."""
-    if not _IDENTIFIER_RE.match(var):
-        if var and var[0].isdigit():
-            raise ValueError(f"{context} component name '{var}' cannot start with a digit")
-        raise ValueError(f"{context} component name '{var}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_]*")
+    """Validate that a component name matches ^[a-zA-Z0-9_][a-zA-Z0-9_.@\\-]*$."""
+    if not _COMPONENT_RE.match(var):
+        raise ValueError(f"{context} component name '{var}' contains invalid characters. Must match [a-zA-Z0-9_][a-zA-Z0-9_.@\\-]*")
 
 
 def _str_bound_to_bound(str_bound):
diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
 
 setup(
     name='pyreason',
-    version='3.5.0',
+    version='3.5.1',
     author='Dyuman Aditya',
     author_email='dyuman.aditya@gmail.com',
     description='An explainable inference software supporting annotated, real valued, graph based and temporal logic',
diff --git a/tests/api_tests/test_pyreason_reasoning.py b/tests/api_tests/test_pyreason_reasoning.py
@@ -149,7 +149,7 @@ def test_reason_with_existing_specific_node_labels(self):
         pr.add_rule(Rule("friend(A, B) <- person(A), person(B)", "test_rule", False))
 
         # Add some facts first to create existing specific labels
-        pr.add_fact(Fact('person("C")', 'person_c', 0, 1))
+        pr.add_fact(Fact('person(C)', 'person_c', 0, 1))
 
         interpretation = pr.reason(timesteps=1)
         assert interpretation is not None
@@ -292,7 +292,7 @@ def test_reason_clears_facts_after_reasoning(self):
         pr.add_rule(Rule("friend(A, B) <- connected(A, B)", "test_rule", False))
 
         # Add some facts
-        pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
+        pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
 
         interpretation = pr.reason(timesteps=1)
         assert interpretation is not None
@@ -352,7 +352,7 @@ def test_multiple_reason_calls(self):
         assert interpretation1 is not None
 
         # Add new facts and reason again
-        pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
+        pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
         interpretation2 = pr.reason(timesteps=1)
         assert interpretation2 is not None
 
@@ -657,7 +657,7 @@ def test_filter_and_sort_functions_with_complex_scenario(self):
         # Add multiple rules
         pr.add_rule(Rule("friend(A, B) <- connected(A, B)", "rule1", False))
         pr.add_rule(Rule("close_friend(A, B) <- friend(A, B)", "rule2", False))
-        pr.add_fact(Fact('person("A")', 'fact1', 0, 2))
+        pr.add_fact(Fact('person(A)', 'fact1', 0, 2))
 
         pr.settings.store_interpretation_changes = True
         interpretation = pr.reason(timesteps=3)
@@ -736,7 +736,7 @@ def test_reason_with_memory_profile_enabled_again_true(self):
         assert interpretation1 is not None
 
         # Add facts for reason_again to work with
-        pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
+        pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
 
         # Enable memory profiling
         pr.settings.memory_profile = True
@@ -776,7 +776,7 @@ def test_reason_with_memory_profile_disabled_again_true(self):
         assert interpretation1 is not None
 
         # Add facts for reason_again to work with
-        pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
+        pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
 
         # Ensure memory profiling is disabled
         pr.settings.memory_profile = False
@@ -798,14 +798,14 @@ def test_reason_again_parameter_combinations(self):
         assert interpretation1 is not None
 
         # Add facts for reason_again calls
-        pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
+        pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
 
         # Test again=True with restart=True (should use _reason_again)
         interpretation2 = pr.reason(timesteps=1, again=True, restart=True)
         assert interpretation2 is not None
 
         # Add more facts for next call
-        pr.add_fact(Fact('person("B")', 'person_b', 0, 1))
+        pr.add_fact(Fact('person(B)', 'person_b', 0, 1))
 
         # Test again=True with restart=False (should use _reason_again)
         interpretation3 = pr.reason(timesteps=1, again=True, restart=False)
@@ -841,7 +841,7 @@ def test_reason_again_internal_function_branches(self):
         assert interpretation1 is not None
 
         # Add some facts to test the fact extension logic in _reason_again
-        pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
+        pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
 
         # This should exercise the fact extension branches in _reason_again
         interpretation2 = pr.reason(timesteps=2, again=True, restart=True)
@@ -859,7 +859,7 @@ def test_reason_again_with_verbose_setting(self):
         assert interpretation1 is not None
 
         # Add facts for reason_again
-        pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
+        pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
 
         # Enable verbose mode
         pr.settings.verbose = True
@@ -899,15 +899,15 @@ def test_reason_with_all_parameter_combinations(self):
             pr.settings.memory_profile = False
 
         # Add facts for again=True tests
-        pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
+        pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
 
         # Test case 3: again=True, memory_profile=False
         pr.settings.memory_profile = False
         interpretation3 = pr.reason(timesteps=1, again=True)
         assert interpretation3 is not None
 
         # Add more facts for next test
-        pr.add_fact(Fact('person("B")', 'person_b', 0, 1))
+        pr.add_fact(Fact('person(B)', 'person_b', 0, 1))
 
         # Test case 4: again=True, memory_profile=True
         pr.settings.memory_profile = True
@@ -933,7 +933,7 @@ def test_reason_again_assert_coverage(self):
         assert interpretation1 is not None
 
         # Add facts for reason_again
-        pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
+        pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
 
         # Now the assert in _reason_again should pass
         interpretation2 = pr.reason(timesteps=1, again=True)
diff --git a/tests/unit/dont_disable_jit/test_rule_parser.py b/tests/unit/dont_disable_jit/test_rule_parser.py
@@ -653,7 +653,7 @@ def test_head_predicate_starts_with_digit(self):
     def test_head_predicate_invalid_chars(self):
         """Head predicate with invalid chars raises ValueError."""
         with pytest.raises(ValueError, match="invalid characters"):
-            parse_rule("pred-name(X) <- b(X)", "r", None)
+            parse_rule("pred!name(X) <- b(X)", "r", None)
 
     def test_body_predicate_starts_with_digit(self):
         """Body predicate starting with digit raises ValueError."""
@@ -663,7 +663,7 @@ def test_body_predicate_starts_with_digit(self):
     def test_body_predicate_invalid_chars(self):
         """Body predicate with invalid chars raises ValueError."""
         with pytest.raises(ValueError, match="invalid characters"):
-            parse_rule("p(X) <- body-name(X)", "r", None)
+            parse_rule("p(X) <- body!name(X)", "r", None)
 
     def test_double_negation_head(self):
         """Double negation in head raises ValueError."""
@@ -675,15 +675,10 @@ def test_double_negation_body(self):
         with pytest.raises(ValueError, match="Double negation"):
             parse_rule("p(X) <- ~~b(X)", "r", None)
 
-    def test_head_variable_starts_with_digit(self):
-        """Head variable starting with digit raises ValueError."""
-        with pytest.raises(ValueError, match="digit"):
-            parse_rule("p(1X) <- b(Y)", "r", None)
-
     def test_body_variable_invalid_chars(self):
         """Body variable with invalid chars raises ValueError."""
         with pytest.raises(ValueError, match="invalid characters"):
-            parse_rule("p(X) <- b(X-Y)", "r", None)
+            parse_rule("p(X) <- b(X!Y)", "r", None)
 
     def test_empty_head_parentheses(self):
         """Empty head parentheses raises ValueError."""