Skip to content

Commit c7ff782

Browse files
authored
Merge pull request #139 from lab-v2/update_parsing_logic
Relax parsing requirements to allow hyphens and periods
2 parents 0266a8b + 4a6f4d5 commit c7ff782

5 files changed

Lines changed: 52 additions & 51 deletions

File tree

pyreason/scripts/utils/fact_parser.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,28 @@
11
import pyreason.scripts.numba_wrapper.numba_types.interval_type as interval
22
import re
33

4+
_PREDICATE_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_.\-]*$')
5+
_COMPONENT_RE = re.compile(r'^[a-zA-Z0-9_][a-zA-Z0-9_.@\-]*$')
6+
7+
8+
def _validate_predicate(name):
9+
"""Validate that a predicate name starts with a letter/underscore."""
10+
if not name:
11+
raise ValueError("Predicate name cannot be empty")
12+
if not _PREDICATE_RE.match(name):
13+
if name[0].isdigit():
14+
raise ValueError(f"Predicate name '{name}' cannot start with a digit. Must start with a letter or underscore")
15+
else:
16+
raise ValueError(f"Predicate name '{name}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_.\\-]*")
17+
18+
19+
def _validate_component(name, context):
20+
"""Validate that a component (entity) name contains only valid characters. May start with a digit."""
21+
if not name:
22+
raise ValueError(f"{context} name cannot be empty")
23+
if not _COMPONENT_RE.match(name):
24+
raise ValueError(f"{context} name '{name}' contains invalid characters. Must match [a-zA-Z0-9_][a-zA-Z0-9_.@\\-]*")
25+
426

527
# Input validation work was implemented with the help of Claude Sonnet 4.5.
628
def parse_fact(fact_text):
@@ -75,28 +97,13 @@ def parse_fact(fact_text):
7597
pred = pred_comp[:idx]
7698
component = pred_comp[idx + 1:-1]
7799

78-
# Validate predicate is not empty
79-
if not pred:
80-
raise ValueError("Predicate cannot be empty")
81-
82-
# Validate predicate contains only valid characters (alphanumeric and underscore)
83-
# Predicates must start with a letter or underscore (like Python identifiers)
84-
if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', pred):
85-
if pred[0].isdigit():
86-
raise ValueError(f"Predicate '{pred}' cannot start with a digit. Must start with a letter or underscore")
87-
else:
88-
raise ValueError(f"Predicate '{pred}' contains invalid characters. Only letters, digits, and underscores allowed, must start with letter or underscore")
100+
# Validate predicate name
101+
_validate_predicate(pred)
89102

90103
# Validate component is not empty
91104
if not component:
92105
raise ValueError("Component cannot be empty")
93106

94-
# Check for invalid characters in component
95-
if '(' in component or ')' in component:
96-
raise ValueError("Component cannot contain parentheses")
97-
if ':' in component:
98-
raise ValueError("Component cannot contain colons")
99-
100107
# Check if it is a node or edge fact
101108
if ',' in component:
102109
fact_type = 'edge'
@@ -106,14 +113,14 @@ def parse_fact(fact_text):
106113
if len(components) != 2:
107114
raise ValueError(f"Edge facts must have exactly 2 components, found {len(components)}")
108115

109-
# Validate no empty components
116+
# Validate component names
110117
for i, comp in enumerate(components):
111-
if not comp:
112-
raise ValueError(f"Component {i+1} in edge fact cannot be empty")
118+
_validate_component(comp, f"Edge component {i+1}")
113119

114120
component = tuple(components)
115121
else:
116122
fact_type = 'node'
123+
_validate_component(component, "Node component")
117124

118125
# Check if bound is a boolean or a list of floats
119126
if bound.lower() == 'true':

pyreason/scripts/utils/rule_parser.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
import pyreason.scripts.numba_wrapper.numba_types.interval_type as interval
1111
from pyreason.scripts.threshold.threshold import Threshold
1212

13-
_IDENTIFIER_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
13+
_PREDICATE_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_.\-]*$')
14+
_COMPONENT_RE = re.compile(r'^[a-zA-Z0-9_][a-zA-Z0-9_.@\-]*$')
1415

1516

1617
def parse_rule(rule_text: str, name: str, custom_thresholds: Union[None, list, dict], infer_edges: bool = False, set_static: bool = False, weights: Union[None, np.ndarray] = None) -> rule.Rule:
@@ -482,19 +483,17 @@ def _parse_head_arguments(head_args_str):
482483

483484

484485
def _validate_predicate_name(pred, context):
485-
"""Validate that a predicate name matches ^[a-zA-Z_][a-zA-Z0-9_]*$."""
486-
if not _IDENTIFIER_RE.match(pred):
486+
"""Validate that a predicate name matches ^[a-zA-Z_][a-zA-Z0-9_.\\-]*$."""
487+
if not _PREDICATE_RE.match(pred):
487488
if pred and pred[0].isdigit():
488489
raise ValueError(f"{context} predicate name '{pred}' cannot start with a digit")
489-
raise ValueError(f"{context} predicate name '{pred}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_]*")
490+
raise ValueError(f"{context} predicate name '{pred}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_.\\-]*")
490491

491492

492493
def _validate_component_name(var, context):
493-
"""Validate that a variable name matches ^[a-zA-Z_][a-zA-Z0-9_]*$."""
494-
if not _IDENTIFIER_RE.match(var):
495-
if var and var[0].isdigit():
496-
raise ValueError(f"{context} component name '{var}' cannot start with a digit")
497-
raise ValueError(f"{context} component name '{var}' contains invalid characters. Must match [a-zA-Z_][a-zA-Z0-9_]*")
494+
"""Validate that a component name matches ^[a-zA-Z0-9_][a-zA-Z0-9_.@\\-]*$."""
495+
if not _COMPONENT_RE.match(var):
496+
raise ValueError(f"{context} component name '{var}' contains invalid characters. Must match [a-zA-Z0-9_][a-zA-Z0-9_.@\\-]*")
498497

499498

500499
def _str_bound_to_bound(str_bound):

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
setup(
1010
name='pyreason',
11-
version='3.5.0',
11+
version='3.5.1',
1212
author='Dyuman Aditya',
1313
author_email='dyuman.aditya@gmail.com',
1414
description='An explainable inference software supporting annotated, real valued, graph based and temporal logic',

tests/api_tests/test_pyreason_reasoning.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def test_reason_with_existing_specific_node_labels(self):
149149
pr.add_rule(Rule("friend(A, B) <- person(A), person(B)", "test_rule", False))
150150

151151
# Add some facts first to create existing specific labels
152-
pr.add_fact(Fact('person("C")', 'person_c', 0, 1))
152+
pr.add_fact(Fact('person(C)', 'person_c', 0, 1))
153153

154154
interpretation = pr.reason(timesteps=1)
155155
assert interpretation is not None
@@ -292,7 +292,7 @@ def test_reason_clears_facts_after_reasoning(self):
292292
pr.add_rule(Rule("friend(A, B) <- connected(A, B)", "test_rule", False))
293293

294294
# Add some facts
295-
pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
295+
pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
296296

297297
interpretation = pr.reason(timesteps=1)
298298
assert interpretation is not None
@@ -352,7 +352,7 @@ def test_multiple_reason_calls(self):
352352
assert interpretation1 is not None
353353

354354
# Add new facts and reason again
355-
pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
355+
pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
356356
interpretation2 = pr.reason(timesteps=1)
357357
assert interpretation2 is not None
358358

@@ -657,7 +657,7 @@ def test_filter_and_sort_functions_with_complex_scenario(self):
657657
# Add multiple rules
658658
pr.add_rule(Rule("friend(A, B) <- connected(A, B)", "rule1", False))
659659
pr.add_rule(Rule("close_friend(A, B) <- friend(A, B)", "rule2", False))
660-
pr.add_fact(Fact('person("A")', 'fact1', 0, 2))
660+
pr.add_fact(Fact('person(A)', 'fact1', 0, 2))
661661

662662
pr.settings.store_interpretation_changes = True
663663
interpretation = pr.reason(timesteps=3)
@@ -736,7 +736,7 @@ def test_reason_with_memory_profile_enabled_again_true(self):
736736
assert interpretation1 is not None
737737

738738
# Add facts for reason_again to work with
739-
pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
739+
pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
740740

741741
# Enable memory profiling
742742
pr.settings.memory_profile = True
@@ -776,7 +776,7 @@ def test_reason_with_memory_profile_disabled_again_true(self):
776776
assert interpretation1 is not None
777777

778778
# Add facts for reason_again to work with
779-
pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
779+
pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
780780

781781
# Ensure memory profiling is disabled
782782
pr.settings.memory_profile = False
@@ -798,14 +798,14 @@ def test_reason_again_parameter_combinations(self):
798798
assert interpretation1 is not None
799799

800800
# Add facts for reason_again calls
801-
pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
801+
pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
802802

803803
# Test again=True with restart=True (should use _reason_again)
804804
interpretation2 = pr.reason(timesteps=1, again=True, restart=True)
805805
assert interpretation2 is not None
806806

807807
# Add more facts for next call
808-
pr.add_fact(Fact('person("B")', 'person_b', 0, 1))
808+
pr.add_fact(Fact('person(B)', 'person_b', 0, 1))
809809

810810
# Test again=True with restart=False (should use _reason_again)
811811
interpretation3 = pr.reason(timesteps=1, again=True, restart=False)
@@ -841,7 +841,7 @@ def test_reason_again_internal_function_branches(self):
841841
assert interpretation1 is not None
842842

843843
# Add some facts to test the fact extension logic in _reason_again
844-
pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
844+
pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
845845

846846
# This should exercise the fact extension branches in _reason_again
847847
interpretation2 = pr.reason(timesteps=2, again=True, restart=True)
@@ -859,7 +859,7 @@ def test_reason_again_with_verbose_setting(self):
859859
assert interpretation1 is not None
860860

861861
# Add facts for reason_again
862-
pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
862+
pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
863863

864864
# Enable verbose mode
865865
pr.settings.verbose = True
@@ -899,15 +899,15 @@ def test_reason_with_all_parameter_combinations(self):
899899
pr.settings.memory_profile = False
900900

901901
# Add facts for again=True tests
902-
pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
902+
pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
903903

904904
# Test case 3: again=True, memory_profile=False
905905
pr.settings.memory_profile = False
906906
interpretation3 = pr.reason(timesteps=1, again=True)
907907
assert interpretation3 is not None
908908

909909
# Add more facts for next test
910-
pr.add_fact(Fact('person("B")', 'person_b', 0, 1))
910+
pr.add_fact(Fact('person(B)', 'person_b', 0, 1))
911911

912912
# Test case 4: again=True, memory_profile=True
913913
pr.settings.memory_profile = True
@@ -933,7 +933,7 @@ def test_reason_again_assert_coverage(self):
933933
assert interpretation1 is not None
934934

935935
# Add facts for reason_again
936-
pr.add_fact(Fact('person("A")', 'person_a', 0, 1))
936+
pr.add_fact(Fact('person(A)', 'person_a', 0, 1))
937937

938938
# Now the assert in _reason_again should pass
939939
interpretation2 = pr.reason(timesteps=1, again=True)

tests/unit/dont_disable_jit/test_rule_parser.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ def test_head_predicate_starts_with_digit(self):
653653
def test_head_predicate_invalid_chars(self):
654654
"""Head predicate with invalid chars raises ValueError."""
655655
with pytest.raises(ValueError, match="invalid characters"):
656-
parse_rule("pred-name(X) <- b(X)", "r", None)
656+
parse_rule("pred!name(X) <- b(X)", "r", None)
657657

658658
def test_body_predicate_starts_with_digit(self):
659659
"""Body predicate starting with digit raises ValueError."""
@@ -663,7 +663,7 @@ def test_body_predicate_starts_with_digit(self):
663663
def test_body_predicate_invalid_chars(self):
664664
"""Body predicate with invalid chars raises ValueError."""
665665
with pytest.raises(ValueError, match="invalid characters"):
666-
parse_rule("p(X) <- body-name(X)", "r", None)
666+
parse_rule("p(X) <- body!name(X)", "r", None)
667667

668668
def test_double_negation_head(self):
669669
"""Double negation in head raises ValueError."""
@@ -675,15 +675,10 @@ def test_double_negation_body(self):
675675
with pytest.raises(ValueError, match="Double negation"):
676676
parse_rule("p(X) <- ~~b(X)", "r", None)
677677

678-
def test_head_variable_starts_with_digit(self):
679-
"""Head variable starting with digit raises ValueError."""
680-
with pytest.raises(ValueError, match="digit"):
681-
parse_rule("p(1X) <- b(Y)", "r", None)
682-
683678
def test_body_variable_invalid_chars(self):
684679
"""Body variable with invalid chars raises ValueError."""
685680
with pytest.raises(ValueError, match="invalid characters"):
686-
parse_rule("p(X) <- b(X-Y)", "r", None)
681+
parse_rule("p(X) <- b(X!Y)", "r", None)
687682

688683
def test_empty_head_parentheses(self):
689684
"""Empty head parentheses raises ValueError."""

0 commit comments

Comments
 (0)