Skip to content

Commit 4e0a0f2

Browse files
committed
refactor: replace custom text analyzer method with a direct function call
1 parent 83c2431 commit 4e0a0f2

3 files changed

Lines changed: 42 additions & 55 deletions

File tree

integration/test_collection_config.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,7 +2218,7 @@ def test_property_text_analyzer_ascii_fold_version_gate(
22182218
name="title",
22192219
data_type=DataType.TEXT,
22202220
tokenization=Tokenization.WORD,
2221-
text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True),
2221+
text_analyzer=Configure.TextAnalyzer(ascii_fold=True),
22222222
),
22232223
],
22242224
)
@@ -2245,14 +2245,14 @@ def test_collection_stopword_presets(collection_factory: CollectionFactory) -> N
22452245
name="title_fr",
22462246
data_type=DataType.TEXT,
22472247
tokenization=Tokenization.WORD,
2248-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"),
2248+
text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
22492249
),
22502250
# Built-in English preset, set per property.
22512251
Property(
22522252
name="title_en",
22532253
data_type=DataType.TEXT,
22542254
tokenization=Tokenization.WORD,
2255-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset=StopwordsPreset.EN),
2255+
text_analyzer=Configure.TextAnalyzer(stopword_preset=StopwordsPreset.EN),
22562256
),
22572257
# No stopword override → uses the collection-level default.
22582258
Property(
@@ -2292,7 +2292,7 @@ def test_collection_stopword_presets_update(collection_factory: CollectionFactor
22922292
name="title_fr",
22932293
data_type=DataType.TEXT,
22942294
tokenization=Tokenization.WORD,
2295-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"),
2295+
text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
22962296
),
22972297
],
22982298
)
@@ -2328,7 +2328,7 @@ def test_collection_stopword_presets_remove_in_use_is_rejected(
23282328
name="title_fr",
23292329
data_type=DataType.TEXT,
23302330
tokenization=Tokenization.WORD,
2331-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"),
2331+
text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
23322332
),
23332333
],
23342334
)
@@ -2381,7 +2381,7 @@ def test_collection_stopword_presets_remove_unused_is_allowed(
23812381
name="title",
23822382
data_type=DataType.TEXT,
23832383
tokenization=Tokenization.WORD,
2384-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"),
2384+
text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
23852385
),
23862386
],
23872387
)
@@ -2418,7 +2418,7 @@ def test_collection_stopword_presets_remove_referenced_by_nested_property_is_rej
24182418
name="body",
24192419
data_type=DataType.TEXT,
24202420
tokenization=Tokenization.WORD,
2421-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"),
2421+
text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
24222422
),
24232423
],
24242424
),
@@ -2453,7 +2453,7 @@ def test_collection_user_defined_stopword_preset_overrides_builtin(
24532453
name="title",
24542454
data_type=DataType.TEXT,
24552455
tokenization=Tokenization.WORD,
2456-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="en"),
2456+
text_analyzer=Configure.TextAnalyzer(stopword_preset="en"),
24572457
),
24582458
],
24592459
)
@@ -2480,7 +2480,7 @@ def test_property_text_analyzer_combined_ascii_fold_and_stopword_preset(
24802480
name="title",
24812481
data_type=DataType.TEXT,
24822482
tokenization=Tokenization.WORD,
2483-
text_analyzer=Configure.TextAnalyzer.custom(
2483+
text_analyzer=Configure.TextAnalyzer(
24842484
ascii_fold=True,
24852485
stopword_preset=StopwordsPreset.EN,
24862486
),
@@ -2514,9 +2514,7 @@ def test_property_text_analyzer_ascii_fold_immutable(
25142514
name="title",
25152515
data_type=DataType.TEXT,
25162516
tokenization=Tokenization.WORD,
2517-
text_analyzer=Configure.TextAnalyzer.custom(
2518-
ascii_fold=True, ascii_fold_ignore=["é"]
2519-
),
2517+
text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["é"]),
25202518
),
25212519
],
25222520
)
@@ -2535,7 +2533,7 @@ def test_property_text_analyzer_ascii_fold_immutable(
25352533
name="title2",
25362534
data_type=DataType.TEXT,
25372535
tokenization=Tokenization.WORD,
2538-
text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["ñ"]),
2536+
text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]),
25392537
),
25402538
)
25412539
config = collection.config.get()
@@ -2589,7 +2587,7 @@ def test_stopword_presets_roundtrip_from_dict(
25892587
name="title",
25902588
data_type=DataType.TEXT,
25912589
tokenization=Tokenization.WORD,
2592-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"),
2590+
text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
25932591
),
25942592
],
25952593
)
@@ -2625,7 +2623,7 @@ def test_text_analyzer_roundtrip_from_dict(
26252623
name="title",
26262624
data_type=DataType.TEXT,
26272625
tokenization=Tokenization.WORD,
2628-
text_analyzer=Configure.TextAnalyzer.custom(
2626+
text_analyzer=Configure.TextAnalyzer(
26292627
ascii_fold=True,
26302628
ascii_fold_ignore=["é"],
26312629
stopword_preset=StopwordsPreset.EN,

test/collection/test_config.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3035,7 +3035,7 @@ def test_property_with_ascii_fold_only(self) -> None:
30353035
prop = Property(
30363036
name="title",
30373037
data_type=DataType.TEXT,
3038-
text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True),
3038+
text_analyzer=Configure.TextAnalyzer(ascii_fold=True),
30393039
)
30403040
assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True}
30413041

@@ -3044,9 +3044,7 @@ def test_property_with_ascii_fold_and_ignore(self) -> None:
30443044
name="title",
30453045
data_type=DataType.TEXT,
30463046
tokenization=Tokenization.WORD,
3047-
text_analyzer=Configure.TextAnalyzer.custom(
3048-
ascii_fold=True, ascii_fold_ignore=["é", "ñ"]
3049-
),
3047+
text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]),
30503048
)
30513049
out = prop._to_dict()
30523050
assert out["textAnalyzer"] == {
@@ -3067,9 +3065,7 @@ def test_nested_property_with_text_analyzer(self) -> None:
30673065
Property(
30683066
name="title",
30693067
data_type=DataType.TEXT,
3070-
text_analyzer=Configure.TextAnalyzer.custom(
3071-
ascii_fold=True, ascii_fold_ignore=["ñ"]
3072-
),
3068+
text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]),
30733069
),
30743070
],
30753071
)
@@ -3090,7 +3086,7 @@ def test_text_analyzer_stopword_preset_builtin_enum(self) -> None:
30903086
name="title",
30913087
data_type=DataType.TEXT,
30923088
tokenization=Tokenization.WORD,
3093-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset=StopwordsPreset.EN),
3089+
text_analyzer=Configure.TextAnalyzer(stopword_preset=StopwordsPreset.EN),
30943090
)
30953091
assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "en"}
30963092

@@ -3099,7 +3095,7 @@ def test_text_analyzer_stopword_preset_user_defined_string(self) -> None:
30993095
name="title_fr",
31003096
data_type=DataType.TEXT,
31013097
tokenization=Tokenization.WORD,
3102-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"),
3098+
text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
31033099
)
31043100
assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "fr"}
31053101

@@ -3108,7 +3104,7 @@ def test_text_analyzer_combined_ascii_fold_and_stopword_preset(self) -> None:
31083104
name="title",
31093105
data_type=DataType.TEXT,
31103106
tokenization=Tokenization.WORD,
3111-
text_analyzer=Configure.TextAnalyzer.custom(
3107+
text_analyzer=Configure.TextAnalyzer(
31123108
ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset="fr"
31133109
),
31143110
)
@@ -3123,7 +3119,7 @@ def test_text_analyzer_stopword_preset_only_omits_other_keys(self) -> None:
31233119
name="title",
31243120
data_type=DataType.TEXT,
31253121
tokenization=Tokenization.WORD,
3126-
text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"),
3122+
text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
31273123
)
31283124
out = prop._to_dict()
31293125
assert "asciiFold" not in out["textAnalyzer"]

weaviate/collections/classes/config.py

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2223,35 +2223,28 @@ def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate":
22232223
return self
22242224

22252225

2226-
class _TextAnalyzer:
2227-
"""Factory class for creating text analyzer configurations.
2228-
2229-
Use ``Configure.TextAnalyzer`` to access these methods.
2226+
def _text_analyzer(
2227+
ascii_fold: Optional[bool] = None,
2228+
ascii_fold_ignore: Optional[List[str]] = None,
2229+
stopword_preset: Optional[Union[StopwordsPreset, str]] = None,
2230+
) -> _TextAnalyzerConfigCreate:
2231+
"""Create a text analyzer config for a property.
2232+
2233+
Args:
2234+
ascii_fold: If True, accent/diacritic marks are folded to their base
2235+
characters during indexing and search (e.g. 'école' matches 'ecole').
2236+
ascii_fold_ignore: Optional list of characters that should be excluded
2237+
from ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to
2238+
'e'). Requires ``ascii_fold=True``.
2239+
stopword_preset: Stopword preset name to override the collection-level
2240+
stopwords for this property. Accepts a ``StopwordsPreset`` or a
2241+
user-defined preset name.
22302242
"""
2231-
2232-
@staticmethod
2233-
def custom(
2234-
ascii_fold: Optional[bool] = None,
2235-
ascii_fold_ignore: Optional[List[str]] = None,
2236-
stopword_preset: Optional[Union[StopwordsPreset, str]] = None,
2237-
) -> _TextAnalyzerConfigCreate:
2238-
"""Create a text analyzer config with custom settings.
2239-
2240-
Args:
2241-
ascii_fold: If True, accent/diacritic marks are folded to their base
2242-
characters during indexing and search (e.g. 'école' matches 'ecole').
2243-
ascii_fold_ignore: Optional list of characters that should be excluded
2244-
from ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to
2245-
'e'). Requires ``ascii_fold=True``.
2246-
stopword_preset: Stopword preset name to override the collection-level
2247-
stopwords for this property. Accepts a ``StopwordsPreset`` or a
2248-
user-defined preset name.
2249-
"""
2250-
return _TextAnalyzerConfigCreate(
2251-
ascii_fold=ascii_fold,
2252-
ascii_fold_ignore=ascii_fold_ignore,
2253-
stopword_preset=stopword_preset,
2254-
)
2243+
return _TextAnalyzerConfigCreate(
2244+
ascii_fold=ascii_fold,
2245+
ascii_fold_ignore=ascii_fold_ignore,
2246+
stopword_preset=stopword_preset,
2247+
)
22552248

22562249

22572250
class Property(_ConfigCreateModel):
@@ -2665,7 +2658,7 @@ class Configure:
26652658
MultiVectors = _MultiVectors
26662659
ObjectTTL = _ObjectTTL
26672660
Replication = _Replication
2668-
TextAnalyzer = _TextAnalyzer
2661+
TextAnalyzer = staticmethod(_text_analyzer)
26692662

26702663
@staticmethod
26712664
def inverted_index(

0 commit comments

Comments
 (0)