weaviate
diff --git a/‎integration/test_collection_config.py‎
Lines changed: 404 additions & 1 deletion b/‎integration/test_collection_config.py‎
Lines changed: 404 additions & 1 deletion
diff --git a/‎test/collection/test_config.py‎
Lines changed: 102 additions & 4 deletions b/‎test/collection/test_config.py‎
Lines changed: 102 additions & 4 deletions
diff --git a/‎test/collection/test_config_methods.py‎
Lines changed: 100 additions & 0 deletions b/‎test/collection/test_config_methods.py‎
Lines changed: 100 additions & 0 deletions
@@ -9,6 +9,7 @@
     Property,
     Reconfigure,
     ReferenceProperty,
+    StopwordsPreset,
     Tokenization,
     Vectorizers,
     _AsyncReplicationConfig,
@@ -3034,7 +3035,7 @@ def test_property_with_ascii_fold_only(self) -> None:
         prop = Property(
             name="title",
             data_type=DataType.TEXT,
-            text_analyzer=Configure.TextAnalyzer.ascii_fold(),
+            text_analyzer=Configure.TextAnalyzer(ascii_fold=True),
         )
         assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True}
 
@@ -3043,7 +3044,7 @@ def test_property_with_ascii_fold_and_ignore(self) -> None:
             name="title",
             data_type=DataType.TEXT,
             tokenization=Tokenization.WORD,
-            text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["é", "ñ"]),
+            text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]),
         )
         out = prop._to_dict()
         assert out["textAnalyzer"] == {
@@ -3064,7 +3065,7 @@ def test_nested_property_with_text_analyzer(self) -> None:
                 Property(
                     name="title",
                     data_type=DataType.TEXT,
-                    text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["ñ"]),
+                    text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]),
                 ),
             ],
         )
@@ -3078,4 +3079,101 @@ def test_text_analyzer_rejects_wrong_types(self) -> None:
         with pytest.raises(ValidationError):
             _TextAnalyzerConfigCreate(ascii_fold="yes")  # type: ignore[arg-type]
         with pytest.raises(ValidationError):
-            _TextAnalyzerConfigCreate(ascii_fold_ignore="é")  # type: ignore[arg-type]
+            _TextAnalyzerConfigCreate(ascii_fold_ignore="é")
+
+    def test_text_analyzer_stopword_preset_builtin_enum(self) -> None:
+        prop = Property(
+            name="title",
+            data_type=DataType.TEXT,
+            tokenization=Tokenization.WORD,
+            text_analyzer=Configure.TextAnalyzer(stopword_preset=StopwordsPreset.EN),
+        )
+        assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "en"}
+
+    def test_text_analyzer_stopword_preset_user_defined_string(self) -> None:
+        prop = Property(
+            name="title_fr",
+            data_type=DataType.TEXT,
+            tokenization=Tokenization.WORD,
+            text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
+        )
+        assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "fr"}
+
+    def test_text_analyzer_combined_ascii_fold_and_stopword_preset(self) -> None:
+        prop = Property(
+            name="title",
+            data_type=DataType.TEXT,
+            tokenization=Tokenization.WORD,
+            text_analyzer=Configure.TextAnalyzer(
+                ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset="fr"
+            ),
+        )
+        assert prop._to_dict()["textAnalyzer"] == {
+            "asciiFold": True,
+            "asciiFoldIgnore": ["é"],
+            "stopwordPreset": "fr",
+        }
+
+    def test_text_analyzer_stopword_preset_only_omits_other_keys(self) -> None:
+        prop = Property(
+            name="title",
+            data_type=DataType.TEXT,
+            tokenization=Tokenization.WORD,
+            text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"),
+        )
+        out = prop._to_dict()
+        assert "asciiFold" not in out["textAnalyzer"]
+        assert "asciiFoldIgnore" not in out["textAnalyzer"]
+
+
+class TestInvertedIndexStopwordPresets:
+    def test_configure_inverted_index_with_stopword_presets(self) -> None:
+        ic = Configure.inverted_index(
+            stopword_presets={
+                "fr": ["le", "la", "les"],
+                "es": ["el", "la", "los"],
+            },
+        )
+        out = ic._to_dict()
+        assert out["stopwordPresets"] == {
+            "fr": ["le", "la", "les"],
+            "es": ["el", "la", "los"],
+        }
+
+    def test_configure_inverted_index_without_stopword_presets_omits_key(self) -> None:
+        ic = Configure.inverted_index()
+        assert "stopwordPresets" not in ic._to_dict()
+
+    def test_reconfigure_inverted_index_merges_stopword_presets(self) -> None:
+        rc = Reconfigure.inverted_index(stopword_presets={"fr": ["le", "la"]})
+        existing = {
+            "stopwords": {"preset": "en", "additions": None, "removals": None},
+            "bm25": {"b": 0.75, "k1": 1.2},
+            "cleanupIntervalSeconds": 60,
+        }
+        merged = rc.merge_with_existing(existing)
+        assert merged["stopwordPresets"] == {"fr": ["le", "la"]}
+        # other fields untouched
+        assert merged["stopwords"]["preset"] == "en"
+        assert merged["bm25"]["b"] == 0.75
+
+    def test_reconfigure_inverted_index_replaces_existing_stopword_presets(self) -> None:
+        rc = Reconfigure.inverted_index(stopword_presets={"fr": ["le"]})
+        existing = {
+            "stopwords": {"preset": "en", "additions": None, "removals": None},
+            "stopwordPresets": {"fr": ["le", "la", "les"], "es": ["el"]},
+        }
+        merged = rc.merge_with_existing(existing)
+        # The new value fully replaces the prior dict (this matches the server-side
+        # PUT semantics — see test_tokenize.py::test_remove_unused_preset_is_allowed).
+        assert merged["stopwordPresets"] == {"fr": ["le"]}
+
+    def test_reconfigure_inverted_index_without_stopword_presets_leaves_existing(self) -> None:
+        rc = Reconfigure.inverted_index(bm25_b=0.7, bm25_k1=1.1)
+        existing = {
+            "stopwords": {"preset": "en", "additions": None, "removals": None},
+            "bm25": {"b": 0.75, "k1": 1.2},
+            "stopwordPresets": {"fr": ["le", "la"]},
+        }
+        merged = rc.merge_with_existing(existing)
+        assert merged["stopwordPresets"] == {"fr": ["le", "la"]}
@@ -1,4 +1,5 @@
 from weaviate.collections.classes.config_methods import (
+    _collection_config_from_json,
     _collection_configs_simple_from_json,
     _nested_properties_from_config,
     _properties_from_config,
@@ -145,3 +146,102 @@ def test_nested_properties_from_config_parses_text_analyzer() -> None:
         "asciiFold": True,
         "asciiFoldIgnore": ["ñ"],
     }
+
+
+def test_properties_from_config_parses_stopword_preset_only() -> None:
+    """A property with only stopwordPreset (no asciiFold) must still produce a text_analyzer."""
+    schema = {
+        "vectorizer": "none",
+        "properties": [
+            _make_text_prop("title", textAnalyzer={"stopwordPreset": "fr"}),
+        ],
+    }
+    title = _properties_from_config(schema)[0]
+    assert title.text_analyzer is not None
+    assert title.text_analyzer.ascii_fold is False
+    assert title.text_analyzer.ascii_fold_ignore is None
+    assert title.text_analyzer.stopword_preset == "fr"
+
+
+def test_properties_from_config_parses_combined_text_analyzer() -> None:
+    schema = {
+        "vectorizer": "none",
+        "properties": [
+            _make_text_prop(
+                "title",
+                textAnalyzer={
+                    "asciiFold": True,
+                    "asciiFoldIgnore": ["é"],
+                    "stopwordPreset": "fr",
+                },
+            ),
+        ],
+    }
+    title = _properties_from_config(schema)[0]
+    assert title.text_analyzer is not None
+    assert title.text_analyzer.ascii_fold is True
+    assert title.text_analyzer.ascii_fold_ignore == ["é"]
+    assert title.text_analyzer.stopword_preset == "fr"
+
+
+def _full_schema(class_name: str, **inverted_overrides) -> dict:
+    inverted = {
+        "bm25": {"b": 0.75, "k1": 1.2},
+        "cleanupIntervalSeconds": 60,
+        "stopwords": {"preset": "en", "additions": None, "removals": None},
+    }
+    inverted.update(inverted_overrides)
+    return {
+        "class": class_name,
+        "vectorizer": "none",
+        "properties": [],
+        "invertedIndexConfig": inverted,
+        "replicationConfig": {"factor": 1, "deletionStrategy": "NoAutomatedResolution"},
+        "shardingConfig": {
+            "virtualPerPhysical": 128,
+            "desiredCount": 1,
+            "actualCount": 1,
+            "desiredVirtualCount": 128,
+            "actualVirtualCount": 128,
+            "key": "_id",
+            "strategy": "hash",
+            "function": "murmur3",
+        },
+        "vectorIndexType": "hnsw",
+        "vectorIndexConfig": {
+            "skip": False,
+            "cleanupIntervalSeconds": 300,
+            "maxConnections": 64,
+            "efConstruction": 128,
+            "ef": -1,
+            "dynamicEfMin": 100,
+            "dynamicEfMax": 500,
+            "dynamicEfFactor": 8,
+            "vectorCacheMaxObjects": 1000000000000,
+            "flatSearchCutoff": 40000,
+            "distance": "cosine",
+        },
+    }
+
+
+def test_collection_config_parses_stopword_presets() -> None:
+    """The inverted index config exposes stopwordPresets when present in the schema."""
+    schema = _full_schema(
+        "TestStopwordPresets",
+        stopwordPresets={
+            "fr": ["le", "la", "les"],
+            "es": ["el", "la", "los"],
+        },
+    )
+    full = _collection_config_from_json(schema)
+    assert full.inverted_index_config.stopword_presets == {
+        "fr": ["le", "la", "les"],
+        "es": ["el", "la", "los"],
+    }
+
+
+def test_collection_config_stopword_presets_absent() -> None:
+    """If the server response omits stopwordPresets, the parsed value is None."""
+    schema = _full_schema("TestNoStopwordPresets")
+    full = _collection_config_from_json(schema)
+    assert full.inverted_index_config.stopword_presets is None