weaviate · dirkkul · Apr 14, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py
@@ -42,7 +42,11 @@
     IndexName,
 )
 from weaviate.collections.classes.tenants import Tenant
-from weaviate.exceptions import UnexpectedStatusCodeError, WeaviateInvalidInputError
+from weaviate.exceptions import (
+    UnexpectedStatusCodeError,
+    WeaviateInvalidInputError,
+    WeaviateUnsupportedFeatureError,
+)
 from integration.conftest import retry_on_http_error
 
 
@@ -2196,3 +2200,48 @@ def test_delete_property_index(
         assert config.properties[0].index_range_filters is False
         assert config.properties[0].index_searchable is _index_searchable
         assert config.properties[0].index_filterable is _index_filterable
+
+
+def test_property_text_analyzer_ascii_fold_version_gate(
+    collection_factory: CollectionFactory,
+) -> None:
+    """On Weaviate < 1.37 the client must raise before sending the request."""
+    dummy = collection_factory("dummy")
+    if dummy._connection._weaviate_version.is_at_least(1, 37, 0):
+        pytest.skip("Version gate only applies to Weaviate < 1.37.0")
+
+    with pytest.raises(WeaviateUnsupportedFeatureError):
+        collection_factory(
+            vectorizer_config=Configure.Vectorizer.none(),
+            properties=[
+                Property(
+                    name="title",
+                    data_type=DataType.TEXT,
+                    tokenization=Tokenization.WORD,
+                    text_analyzer=Configure.TextAnalyzer.ascii_fold(),
+                ),
+            ],
+        )
+
+
+def test_stopwords_roundtrip_from_dict(collection_factory: CollectionFactory) -> None:
+    collection = collection_factory(
+        inverted_index_config=Configure.inverted_index(
+            stopwords_additions=["a"],
+            stopwords_preset=StopwordsPreset.EN,
+            stopwords_removals=["the"],
+        ),
+    )
+    config = collection.config.get()
+    assert config.inverted_index_config.stopwords.preset == StopwordsPreset.EN
+    assert config.inverted_index_config.stopwords.removals == ["the"]
+
+    name = f"TestStopwordsRoundtrip{collection.name}"
+    config.name = name
+    with weaviate.connect_to_local() as client:
+        client.collections.delete(name)
+        client.collections.create_from_dict(config.to_dict())
+        new = client.collections.use(name).config.get()
+        assert config == new
+        assert config.to_dict() == new.to_dict()
+        client.collections.delete(name)
diff --git a/test/collection/test_config.py b/test/collection/test_config.py
@@ -4,18 +4,20 @@
 from pydantic import ValidationError
 
 from weaviate.collections.classes.config import (
-    _AsyncReplicationConfig,
-    _ReplicationConfig,
-    _ReplicationConfigUpdate,
     Configure,
     DataType,
     Property,
     Reconfigure,
     ReferenceProperty,
+    Tokenization,
     Vectorizers,
+    _AsyncReplicationConfig,
     _CollectionConfigCreate,
     _GenerativeProvider,
+    _ReplicationConfig,
+    _ReplicationConfigUpdate,
     _RerankerProvider,
+    _TextAnalyzerConfigCreate,
     _VectorizerConfigCreate,
     _ReplicationConfigCreate,
     ReplicationDeletionStrategy,
@@ -3021,3 +3023,59 @@ def test_nested_property_with_id_name_is_allowed() -> None:
         ],
     )
     assert prop.nestedProperties[0].name == "id"
+
+
+class Test_TextAnalyzerConfigCreate:
+    def test_property_without_text_analyzer_omits_key(self) -> None:
+        prop = Property(name="title", data_type=DataType.TEXT)
+        assert "textAnalyzer" not in prop._to_dict()
+
+    def test_property_with_ascii_fold_only(self) -> None:
+        prop = Property(
+            name="title",
+            data_type=DataType.TEXT,
+            text_analyzer=Configure.TextAnalyzer.ascii_fold(),
+        )
+        assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True}
+
+    def test_property_with_ascii_fold_and_ignore(self) -> None:
+        prop = Property(
+            name="title",
+            data_type=DataType.TEXT,
+            tokenization=Tokenization.WORD,
+            text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["é", "ñ"]),
+        )
+        out = prop._to_dict()
+        assert out["textAnalyzer"] == {
+            "asciiFold": True,
+            "asciiFoldIgnore": ["é", "ñ"],
+        }
+        assert out["tokenization"] == "word"
+
+    def test_text_analyzer_rejects_ignore_without_ascii_fold(self) -> None:
+        with pytest.raises(ValidationError):
+            _TextAnalyzerConfigCreate(ascii_fold_ignore=["é"])
+
+    def test_nested_property_with_text_analyzer(self) -> None:
+        prop = Property(
+            name="meta",
+            data_type=DataType.OBJECT,
+            nested_properties=[
+                Property(
+                    name="title",
+                    data_type=DataType.TEXT,
+                    text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["ñ"]),
+                ),
+            ],
+        )
+        out = prop._to_dict()
+        assert out["nestedProperties"][0]["textAnalyzer"] == {
+            "asciiFold": True,
+            "asciiFoldIgnore": ["ñ"],
+        }
+
+    def test_text_analyzer_rejects_wrong_types(self) -> None:
+        with pytest.raises(ValidationError):
+            _TextAnalyzerConfigCreate(ascii_fold="yes")  # type: ignore[arg-type]
+        with pytest.raises(ValidationError):
+            _TextAnalyzerConfigCreate(ascii_fold_ignore="é")  # type: ignore[arg-type]
diff --git a/test/collection/test_config_methods.py b/test/collection/test_config_methods.py
@@ -1,4 +1,8 @@
-from weaviate.collections.classes.config_methods import _collection_configs_simple_from_json
+from weaviate.collections.classes.config_methods import (
+    _collection_configs_simple_from_json,
+    _nested_properties_from_config,
+    _properties_from_config,
+)
 
 
 def test_collection_config_simple_from_json_with_none_vectorizer_config() -> None:
@@ -68,3 +72,76 @@ def test_collection_config_simple_from_json_with_none_vectorizer_config() -> Non
     assert "default" in vec_config
     assert vec_config["default"].vectorizer.model == {}
     assert vec_config["default"].vectorizer.source_properties is None
+
+
+def _make_text_prop(name: str, **extra) -> dict:
+    base = {
+        "name": name,
+        "dataType": ["text"],
+        "indexFilterable": True,
+        "indexSearchable": True,
+        "indexRangeFilters": False,
+        "tokenization": "word",
+    }
+    base.update(extra)
+    return base
+
+
+def test_properties_from_config_parses_text_analyzer() -> None:
+    schema = {
+        "vectorizer": "none",
+        "properties": [
+            _make_text_prop(
+                "title",
+                textAnalyzer={"asciiFold": True, "asciiFoldIgnore": ["é"]},
+            ),
+            _make_text_prop("body"),
+        ],
+    }
+    props = _properties_from_config(schema)
+    title = next(p for p in props if p.name == "title")
+    body = next(p for p in props if p.name == "body")
+
+    assert title.text_analyzer is not None
+    assert title.text_analyzer.ascii_fold is True
+    assert title.text_analyzer.ascii_fold_ignore == ["é"]
+
+    assert body.text_analyzer is None
+
+    # The dataclass round-trips back to the wire format.
+    assert title.to_dict()["textAnalyzer"] == {
+        "asciiFold": True,
+        "asciiFoldIgnore": ["é"],
+    }
+    assert "textAnalyzer" not in body.to_dict()
+
+
+def test_properties_from_config_text_analyzer_omitted_when_no_ascii_fold() -> None:
+    """If the server response omits asciiFold, the client treats text_analyzer as unset."""
+    schema = {
+        "vectorizer": "none",
+        "properties": [
+            # Server response with textAnalyzer present but no asciiFold key
+            _make_text_prop("title", textAnalyzer={"asciiFoldIgnore": ["é"]}),
+        ],
+    }
+    title = _properties_from_config(schema)[0]
+    assert title.text_analyzer is None
+
+
+def test_nested_properties_from_config_parses_text_analyzer() -> None:
+    nested = _nested_properties_from_config(
+        [
+            _make_text_prop(
+                "title",
+                textAnalyzer={"asciiFold": True, "asciiFoldIgnore": ["ñ"]},
+            ),
+        ]
+    )
+    assert nested[0].text_analyzer is not None
+    assert nested[0].text_analyzer.ascii_fold is True
+    assert nested[0].text_analyzer.ascii_fold_ignore == ["ñ"]
+    assert nested[0].to_dict()["textAnalyzer"] == {
+        "asciiFold": True,
+        "asciiFoldIgnore": ["ñ"],
+    }
diff --git a/weaviate/classes/config.py b/weaviate/classes/config.py
@@ -12,6 +12,7 @@
     ReplicationDeletionStrategy,
     Rerankers,
     StopwordsPreset,
+    TextAnalyzerConfig,
     Tokenization,
     VectorDistances,
 )
@@ -39,6 +40,7 @@
     "ReferenceProperty",
     "Rerankers",
     "StopwordsPreset",
+    "TextAnalyzerConfig",
     "Tokenization",
     "Vectorizers",
     "VectorDistances",

diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py
@@ -15,7 +15,14 @@
 )
 
 from deprecation import deprecated as docstring_deprecated
-from pydantic import AnyHttpUrl, Field, TypeAdapter, ValidationInfo, field_validator
+from pydantic import (
+    AnyHttpUrl,
+    Field,
+    TypeAdapter,
+    ValidationInfo,
+    field_validator,
+    model_validator,
+)
 from typing_extensions import TypeAlias
 from typing_extensions import deprecated as typing_deprecated
 
@@ -1671,6 +1678,15 @@ class _PropertyVectorizerConfig:
 PropertyVectorizerConfig = _PropertyVectorizerConfig
 
 
+@dataclass
+class _TextAnalyzerConfig(_ConfigBase):
+    ascii_fold: bool
+    ascii_fold_ignore: Optional[List[str]]
+
+
+TextAnalyzerConfig = _TextAnalyzerConfig
+
+
 @dataclass
 class _NestedProperty(_ConfigBase):
     data_type: DataType
@@ -1679,6 +1695,7 @@ class _NestedProperty(_ConfigBase):
     index_searchable: bool
     name: str
     nested_properties: Optional[List["NestedProperty"]]
+    text_analyzer: Optional[_TextAnalyzerConfig]
     tokenization: Optional[Tokenization]
 
     def to_dict(self) -> Dict[str, Any]:
@@ -1712,6 +1729,7 @@ class _Property(_PropertyBase):
     index_range_filters: bool
     index_searchable: bool
     nested_properties: Optional[List[NestedProperty]]
+    text_analyzer: Optional[_TextAnalyzerConfig]
     tokenization: Optional[Tokenization]
     vectorizer_config: Optional[PropertyVectorizerConfig]
     vectorizer: Optional[str]
@@ -1724,6 +1742,8 @@ def to_dict(self) -> Dict[str, Any]:
         out["indexSearchable"] = self.index_searchable
         out["indexRangeFilters"] = self.index_range_filters
         out["tokenization"] = self.tokenization.value if self.tokenization else None
+        if self.text_analyzer is not None:
+            out["textAnalyzer"] = self.text_analyzer.to_dict()
         if self.nested_properties is not None and len(self.nested_properties) > 0:
             out["nestedProperties"] = [np.to_dict() for np in self.nested_properties]
         module_config: Dict[str, Any] = {}
@@ -2161,6 +2181,54 @@ class _ShardStatus:
 ShardStatus = _ShardStatus
 
 
+class _TextAnalyzerConfigCreate(_ConfigCreateModel):
+    """Text analysis options for a property.
+
+    Configures ASCII folding behavior for `text` and `text[]` properties that use an
+    inverted index (searchable or filterable). When enabled, accent/diacritic marks are
+    folded to their base characters during indexing and search (e.g. 'école' matches
+    'ecole').
+
+    Attributes:
+        ascii_fold: If True, accent/diacritic marks are folded to their base characters
+            during indexing and search. If omitted, the field is not sent to the server
+            and the server default (False) applies.
+        ascii_fold_ignore: Optional list of characters that should be excluded from
+            ASCII folding (e.g. ['é'] keeps 'é' from being folded to 'e'). If omitted,
+            the field is not sent to the server.
+
+    Both settings are immutable after the property is created.
+    """
+
+    asciiFold: Optional[bool] = Field(default=None, alias="ascii_fold")
+    asciiFoldIgnore: Optional[List[str]] = Field(default=None, alias="ascii_fold_ignore")
+
+    @model_validator(mode="after")
+    def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate":
+        if self.asciiFold is not True and self.asciiFoldIgnore is not None:
+            raise ValueError("asciiFoldIgnore cannot be set when asciiFold is not enabled")
+        return self
+
+
+class _TextAnalyzer:
+    """Factory class for creating text analyzer configurations.
+
+    Use ``Configure.TextAnalyzer`` to access these methods.
+    """
+
+    @staticmethod
+    def ascii_fold(
+        ignore: Optional[List[str]] = None,
+    ) -> _TextAnalyzerConfigCreate:
+        """Create a text analyzer config with ASCII folding enabled.
+
+        Args:
+            ignore: Optional list of characters that should be excluded from
+                ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to 'e').
+        """
+        return _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=ignore)
+
+
 class Property(_ConfigCreateModel):
     """This class defines the structure of a data property that a collection can have within Weaviate.
 
@@ -2173,6 +2241,9 @@ class Property(_ConfigCreateModel):
         index_searchable: Whether the property should be searchable in the inverted index.
         nested_properties: nested properties for data type OBJECT and OBJECT_ARRAY`.
         skip_vectorization: Whether to skip vectorization of the property. Defaults to `False`.
+        text_analyzer: Text analysis options for the property. Configures ASCII folding
+            behavior for text and text[] properties using an inverted index. Immutable
+            after the property is created.
         tokenization: The tokenization method to use for the inverted index. Defaults to `None`.
         vectorize_property_name: Whether to vectorize the property name. Defaults to `True`.
     """
@@ -2187,6 +2258,7 @@ class Property(_ConfigCreateModel):
         default=None, alias="nested_properties"
     )
     skip_vectorization: bool = Field(default=False)
+    textAnalyzer: Optional[_TextAnalyzerConfigCreate] = Field(default=None, alias="text_analyzer")
     tokenization: Optional[Tokenization] = Field(default=None)
     vectorize_property_name: bool = Field(default=True)
 
@@ -2566,6 +2638,7 @@ class Configure:
     MultiVectors = _MultiVectors
     ObjectTTL = _ObjectTTL
     Replication = _Replication
+    TextAnalyzer = _TextAnalyzer
 
     @staticmethod
     def inverted_index(