Skip to content

Commit 8e91984

Browse files
committed
refactor: update TextAnalyzerConfig usage to new Configure class methods
1 parent 7018927 commit 8e91984

3 files changed

Lines changed: 30 additions & 39 deletions

File tree

integration/test_collection_config.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
PQEncoderType,
3131
PQEncoderDistribution,
3232
StopwordsPreset,
33-
TextAnalyzerConfig,
3433
VectorDistances,
3534
VectorIndexType,
3635
Vectorizers,
@@ -2219,7 +2218,7 @@ def test_property_text_analyzer_ascii_fold_version_gate(
22192218
name="title",
22202219
data_type=DataType.TEXT,
22212220
tokenization=Tokenization.WORD,
2222-
text_analyzer=TextAnalyzerConfig(ascii_fold=True),
2221+
text_analyzer=Configure.TextAnalyzer.ascii_fold(),
22232222
),
22242223
],
22252224
)

test/collection/test_config.py

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,20 @@
44
from pydantic import ValidationError
55

66
from weaviate.collections.classes.config import (
7-
_AsyncReplicationConfig,
8-
_ReplicationConfig,
9-
_ReplicationConfigUpdate,
107
Configure,
118
DataType,
129
Property,
1310
Reconfigure,
1411
ReferenceProperty,
15-
TextAnalyzerConfig,
1612
Tokenization,
1713
Vectorizers,
14+
_AsyncReplicationConfig,
1815
_CollectionConfigCreate,
1916
_GenerativeProvider,
17+
_ReplicationConfig,
18+
_ReplicationConfigUpdate,
2019
_RerankerProvider,
20+
_TextAnalyzerConfigCreate,
2121
_VectorizerConfigCreate,
2222
_ReplicationConfigCreate,
2323
ReplicationDeletionStrategy,
@@ -3025,7 +3025,7 @@ def test_nested_property_with_id_name_is_allowed() -> None:
30253025
assert prop.nestedProperties[0].name == "id"
30263026

30273027

3028-
class TestTextAnalyzerConfig:
3028+
class Test_TextAnalyzerConfigCreate:
30293029
def test_property_without_text_analyzer_omits_key(self) -> None:
30303030
prop = Property(name="title", data_type=DataType.TEXT)
30313031
assert "textAnalyzer" not in prop._to_dict()
@@ -3034,7 +3034,7 @@ def test_property_with_ascii_fold_only(self) -> None:
30343034
prop = Property(
30353035
name="title",
30363036
data_type=DataType.TEXT,
3037-
text_analyzer=TextAnalyzerConfig(ascii_fold=True),
3037+
text_analyzer=Configure.TextAnalyzer.ascii_fold(),
30383038
)
30393039
assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True}
30403040

@@ -3043,7 +3043,7 @@ def test_property_with_ascii_fold_and_ignore(self) -> None:
30433043
name="title",
30443044
data_type=DataType.TEXT,
30453045
tokenization=Tokenization.WORD,
3046-
text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]),
3046+
text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["é", "ñ"]),
30473047
)
30483048
out = prop._to_dict()
30493049
assert out["textAnalyzer"] == {
@@ -3052,22 +3052,9 @@ def test_property_with_ascii_fold_and_ignore(self) -> None:
30523052
}
30533053
assert out["tokenization"] == "word"
30543054

3055-
def test_text_analyzer_default_omits_unset_fields(self) -> None:
3056-
prop = Property(
3057-
name="title",
3058-
data_type=DataType.TEXT,
3059-
text_analyzer=TextAnalyzerConfig(),
3060-
)
3061-
# exclude_none drops both unset fields, leaving an empty dict
3062-
assert prop._to_dict()["textAnalyzer"] == {}
3063-
3064-
def test_text_analyzer_only_ignore_list(self) -> None:
3065-
prop = Property(
3066-
name="title",
3067-
data_type=DataType.TEXT,
3068-
text_analyzer=TextAnalyzerConfig(ascii_fold_ignore=["é"]),
3069-
)
3070-
assert prop._to_dict()["textAnalyzer"] == {"asciiFoldIgnore": ["é"]}
3055+
def test_text_analyzer_rejects_ignore_without_ascii_fold(self) -> None:
3056+
with pytest.raises(ValidationError):
3057+
_TextAnalyzerConfigCreate(ascii_fold_ignore=["é"])
30713058

30723059
def test_nested_property_with_text_analyzer(self) -> None:
30733060
prop = Property(
@@ -3077,7 +3064,7 @@ def test_nested_property_with_text_analyzer(self) -> None:
30773064
Property(
30783065
name="title",
30793066
data_type=DataType.TEXT,
3080-
text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["ñ"]),
3067+
text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["ñ"]),
30813068
),
30823069
],
30833070
)
@@ -3087,13 +3074,8 @@ def test_nested_property_with_text_analyzer(self) -> None:
30873074
"asciiFoldIgnore": ["ñ"],
30883075
}
30893076

3090-
def test_text_analyzer_accepts_snake_case_alias(self) -> None:
3091-
ta = TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"])
3092-
assert ta.asciiFold is True
3093-
assert ta.asciiFoldIgnore == ["é"]
3094-
30953077
def test_text_analyzer_rejects_wrong_types(self) -> None:
30963078
with pytest.raises(ValidationError):
3097-
TextAnalyzerConfig(ascii_fold="yes") # type: ignore[arg-type]
3079+
_TextAnalyzerConfigCreate(ascii_fold="yes") # type: ignore[arg-type]
30983080
with pytest.raises(ValidationError):
3099-
TextAnalyzerConfig(ascii_fold_ignore="é") # type: ignore[arg-type]
3081+
_TextAnalyzerConfigCreate(ascii_fold_ignore="é") # type: ignore[arg-type]

weaviate/collections/classes/config.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,14 @@
1515
)
1616

1717
from deprecation import deprecated as docstring_deprecated
18-
from pydantic import AnyHttpUrl, Field, TypeAdapter, ValidationInfo, field_validator, model_validator
18+
from pydantic import (
19+
AnyHttpUrl,
20+
Field,
21+
TypeAdapter,
22+
ValidationInfo,
23+
field_validator,
24+
model_validator,
25+
)
1926
from typing_extensions import TypeAlias
2027
from typing_extensions import deprecated as typing_deprecated
2128

@@ -1677,6 +1684,9 @@ class _TextAnalyzerConfig(_ConfigBase):
16771684
ascii_fold_ignore: Optional[List[str]]
16781685

16791686

1687+
TextAnalyzerConfig = _TextAnalyzerConfig
1688+
1689+
16801690
@dataclass
16811691
class _NestedProperty(_ConfigBase):
16821692
data_type: DataType
@@ -2171,7 +2181,7 @@ class _ShardStatus:
21712181
ShardStatus = _ShardStatus
21722182

21732183

2174-
class TextAnalyzerConfig(_ConfigCreateModel):
2184+
class _TextAnalyzerConfigCreate(_ConfigCreateModel):
21752185
"""Text analysis options for a property.
21762186
21772187
Configures ASCII folding behavior for `text` and `text[]` properties that use an
@@ -2194,7 +2204,7 @@ class TextAnalyzerConfig(_ConfigCreateModel):
21942204
asciiFoldIgnore: Optional[List[str]] = Field(default=None, alias="ascii_fold_ignore")
21952205

21962206
@model_validator(mode="after")
2197-
def _validate_ascii_fold_ignore(self) -> "TextAnalyzerConfig":
2207+
def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate":
21982208
if self.asciiFold is not True and self.asciiFoldIgnore is not None:
21992209
raise ValueError("asciiFoldIgnore cannot be set when asciiFold is not enabled")
22002210
return self
@@ -2209,14 +2219,14 @@ class _TextAnalyzer:
22092219
@staticmethod
22102220
def ascii_fold(
22112221
ignore: Optional[List[str]] = None,
2212-
) -> _TextAnalyzerConfig:
2222+
) -> _TextAnalyzerConfigCreate:
22132223
"""Create a text analyzer config with ASCII folding enabled.
22142224
22152225
Args:
22162226
ignore: Optional list of characters that should be excluded from
22172227
ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to 'e').
22182228
"""
2219-
return _TextAnalyzerConfig(asciiFold=True, asciiFoldIgnore=ignore)
2229+
return _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=ignore)
22202230

22212231

22222232
class Property(_ConfigCreateModel):
@@ -2248,7 +2258,7 @@ class Property(_ConfigCreateModel):
22482258
default=None, alias="nested_properties"
22492259
)
22502260
skip_vectorization: bool = Field(default=False)
2251-
textAnalyzer: Optional[TextAnalyzerConfig] = Field(default=None, alias="text_analyzer")
2261+
textAnalyzer: Optional[_TextAnalyzerConfigCreate] = Field(default=None, alias="text_analyzer")
22522262
tokenization: Optional[Tokenization] = Field(default=None)
22532263
vectorize_property_name: bool = Field(default=True)
22542264

0 commit comments

Comments
 (0)