From 239ed323ccc92dcd1713a5c0b372b7036dd70a79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Tue, 14 Apr 2026 21:03:13 +0100
Subject: [PATCH 1/8] feat: add tokenizer module with sync and async support,
 including integration tests

---
 integration/test_tokenize.py  | 355 ++++++++++++++++++++++++++++++++++
 weaviate/__init__.py          |   2 +
 weaviate/client.py            |   3 +
 weaviate/client.pyi           |   3 +
 weaviate/tokenize/__init__.py |   7 +
 weaviate/tokenize/async_.py   |   8 +
 weaviate/tokenize/executor.py | 166 ++++++++++++++++
 weaviate/tokenize/sync.py     |   8 +
 weaviate/tokenize/types.py    |  25 +++
 9 files changed, 577 insertions(+)
 create mode 100644 integration/test_tokenize.py
 create mode 100644 weaviate/tokenize/__init__.py
 create mode 100644 weaviate/tokenize/async_.py
 create mode 100644 weaviate/tokenize/executor.py
 create mode 100644 weaviate/tokenize/sync.py
 create mode 100644 weaviate/tokenize/types.py

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
new file mode 100644
index 000000000..e54f9d49d
--- /dev/null
+++ b/integration/test_tokenize.py
@@ -0,0 +1,355 @@
+"""Integration tests for the tokenize module.
+
+These tests cover the client's responsibilities:
+- Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, kwargs)
+- Correct deserialization of responses into typed objects
+- Client-side validation (_TextAnalyzerConfigCreate rejects invalid input)
+- Both sync and async client paths
+"""
+
+from typing import AsyncGenerator, Generator
+
+import pytest
+import pytest_asyncio
+
+import weaviate
+from weaviate.collections.classes.config import (
+    StopwordsConfig,
+    StopwordsPreset,
+    TextAnalyzerConfig,
+    Tokenization,
+    _StopwordsCreate,
+    _TextAnalyzerConfigCreate,
+)
+from weaviate.config import AdditionalConfig
+from weaviate.tokenize.types import TokenizeResult
+
+
+@pytest.fixture(scope="module")
+def client() -> Generator[weaviate.WeaviateClient, None, None]:
+    c = weaviate.connect_to_local(
+        additional_config=AdditionalConfig(timeout=(60, 120)),
+    )
+    yield c
+    c.close()
+
+
+@pytest_asyncio.fixture
+async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]:
+    c = weaviate.use_async_with_local(
+        additional_config=AdditionalConfig(timeout=(60, 120)),
+    )
+    await c.connect()
+    yield c
+    await c.close()
+
+
+# ---------------------------------------------------------------------------
+# Serialization: enums, strings, kwargs, _TextAnalyzerConfigCreate
+# ---------------------------------------------------------------------------
+
+
+class TestSerialization:
+    """Verify the client correctly serializes different input forms."""
+
+    @pytest.mark.parametrize(
+        "tokenization,text,expected_tokens",
+        [
+            (Tokenization.WORD, "The quick brown fox", ["the", "quick", "brown", "fox"]),
+            (Tokenization.LOWERCASE, "Hello World Test", ["hello", "world", "test"]),
+            (Tokenization.WHITESPACE, "Hello World Test", ["Hello", "World", "Test"]),
+            (Tokenization.FIELD, "  Hello World  ", ["Hello World"]),
+            (Tokenization.TRIGRAM, "Hello", ["hel", "ell", "llo"]),
+        ],
+    )
+    def test_tokenization_enum(
+        self,
+        client: weaviate.WeaviateClient,
+        tokenization: Tokenization,
+        text: str,
+        expected_tokens: list,
+    ) -> None:
+        result = client.tokenize.text(text=text, tokenization=tokenization)
+        assert isinstance(result, TokenizeResult)
+        assert result.tokenization == tokenization.value
+        assert result.indexed == expected_tokens
+        assert result.query == expected_tokens
+
+    def test_tokenization_string(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenize.text(text="hello world", tokenization="word")
+        assert result.tokenization == "word"
+        assert result.indexed == ["hello", "world"]
+
+    def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenize.text(
+            text="The quick brown fox",
+            tokenization=Tokenization.WORD,
+            stopword_preset=StopwordsPreset.EN,
+        )
+        assert "the" not in result.query
+        assert "quick" in result.query
+
+    def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenize.text(
+            text="The quick brown fox",
+            tokenization=Tokenization.WORD,
+            stopword_preset="en",
+        )
+        assert "the" not in result.query
+
+    def test_ascii_fold_via_kwargs(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenize.text(
+            text="L'école est fermée",
+            tokenization=Tokenization.WORD,
+            ascii_fold=True,
+        )
+        assert result.indexed == ["l", "ecole", "est", "fermee"]
+
+    def test_ascii_fold_via_analyzer_config(self, client: weaviate.WeaviateClient) -> None:
+        cfg = _TextAnalyzerConfigCreate(ascii_fold=True)
+        result = client.tokenize.text(
+            text="L'école est fermée",
+            tokenization=Tokenization.WORD,
+            analyzer_config=cfg,
+        )
+        assert result.indexed == ["l", "ecole", "est", "fermee"]
+
+    def test_analyzer_config_and_kwargs_produce_same_result(
+        self, client: weaviate.WeaviateClient
+    ) -> None:
+        """analyzer_config object and equivalent kwargs must produce identical output."""
+        cfg = _TextAnalyzerConfigCreate(
+            ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN
+        )
+        via_config = client.tokenize.text(
+            text="L'école est fermée",
+            tokenization=Tokenization.WORD,
+            analyzer_config=cfg,
+        )
+        via_kwargs = client.tokenize.text(
+            text="L'école est fermée",
+            tokenization=Tokenization.WORD,
+            ascii_fold=True,
+            ascii_fold_ignore=["é"],
+            stopword_preset=StopwordsPreset.EN,
+        )
+        assert via_config.indexed == via_kwargs.indexed
+        assert via_config.query == via_kwargs.query
+
+    def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenize.text(
+            text="hello world test",
+            tokenization=Tokenization.WORD,
+            stopword_preset="custom",
+            stopword_presets={
+                "custom": _StopwordsCreate(preset=None, additions=["test"], removals=None),
+            },
+        )
+        assert result.indexed == ["hello", "world", "test"]
+        assert result.query == ["hello", "world"]
+
+    def test_stopword_presets_with_base_and_removals(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenize.text(
+            text="the quick",
+            tokenization=Tokenization.WORD,
+            stopword_preset="en-no-the",
+            stopword_presets={
+                "en-no-the": _StopwordsCreate(
+                    preset=StopwordsPreset.EN, additions=None, removals=["the"]
+                ),
+            },
+        )
+        assert result.indexed == ["the", "quick"]
+        assert result.query == ["the", "quick"]
+
+
+# ---------------------------------------------------------------------------
+# Deserialization: typed response fields
+# ---------------------------------------------------------------------------
+
+
+class TestDeserialization:
+    """Verify the client correctly deserializes response fields into typed objects."""
+
+    def test_result_type(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenize.text(text="hello", tokenization=Tokenization.WORD)
+        assert isinstance(result, TokenizeResult)
+        assert isinstance(result.indexed, list)
+        assert isinstance(result.query, list)
+
+    def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenize.text(
+            text="L'école",
+            tokenization=Tokenization.WORD,
+            ascii_fold=True,
+            ascii_fold_ignore=["é"],
+            stopword_preset=StopwordsPreset.EN,
+        )
+        assert isinstance(result.analyzer_config, TextAnalyzerConfig)
+        assert result.analyzer_config.ascii_fold is True
+        assert result.analyzer_config.ascii_fold_ignore == ["é"]
+        assert result.analyzer_config.stopword_preset == "en"
+
+    def test_no_analyzer_config_returns_none(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenize.text(text="hello", tokenization=Tokenization.WORD)
+        assert result.analyzer_config is None
+
+    def test_stopword_config_deserialized_on_property(
+        self, client: weaviate.WeaviateClient
+    ) -> None:
+        """Property endpoint returns stopwordConfig; verify it deserializes to StopwordsConfig."""
+        client.collections.delete("TestDeserStopword")
+        try:
+            client.collections.create_from_dict(
+                {
+                    "class": "TestDeserStopword",
+                    "vectorizer": "none",
+                    "properties": [
+                        {
+                            "name": "title",
+                            "dataType": ["text"],
+                            "tokenization": "word",
+                            "textAnalyzer": {"stopwordPreset": "en"},
+                        },
+                    ],
+                }
+            )
+            result = client.tokenize.property(
+                collection_name="TestDeserStopword",
+                property_name="title",
+                text="the quick",
+            )
+            assert isinstance(result, TokenizeResult)
+            assert result.tokenization == "word"
+            # Stopword config should be deserialized when present
+            if result.stopword_config is not None:
+                assert isinstance(result.stopword_config, StopwordsConfig)
+        finally:
+            client.collections.delete("TestDeserStopword")
+
+    def test_property_result_types(self, client: weaviate.WeaviateClient) -> None:
+        client.collections.delete("TestDeserPropTypes")
+        try:
+            client.collections.create_from_dict(
+                {
+                    "class": "TestDeserPropTypes",
+                    "vectorizer": "none",
+                    "properties": [
+                        {
+                            "name": "tag",
+                            "dataType": ["text"],
+                            "tokenization": "field",
+                        },
+                    ],
+                }
+            )
+            result = client.tokenize.property(
+                collection_name="TestDeserPropTypes",
+                property_name="tag",
+                text="  Hello World  ",
+            )
+            assert isinstance(result, TokenizeResult)
+            assert result.tokenization == "field"
+            assert result.indexed == ["Hello World"]
+        finally:
+            client.collections.delete("TestDeserPropTypes")
+
+
+# ---------------------------------------------------------------------------
+# Client-side validation (_TextAnalyzerConfigCreate)
+# ---------------------------------------------------------------------------
+
+
+class TestClientSideValidation:
+    """Verify that _TextAnalyzerConfigCreate rejects invalid input before hitting the server."""
+
+    def test_ascii_fold_ignore_without_fold_raises(self) -> None:
+        with pytest.raises(ValueError, match="asciiFoldIgnore"):
+            _TextAnalyzerConfigCreate(ascii_fold=False, ascii_fold_ignore=["é"])
+
+    def test_ascii_fold_ignore_without_fold_default_raises(self) -> None:
+        with pytest.raises(ValueError, match="asciiFoldIgnore"):
+            _TextAnalyzerConfigCreate(ascii_fold_ignore=["é"])
+
+    def test_valid_config_does_not_raise(self) -> None:
+        cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é", "ñ"])
+        assert cfg.asciiFold is True
+        assert cfg.asciiFoldIgnore == ["é", "ñ"]
+
+    def test_fold_without_ignore_is_valid(self) -> None:
+        cfg = _TextAnalyzerConfigCreate(ascii_fold=True)
+        assert cfg.asciiFold is True
+        assert cfg.asciiFoldIgnore is None
+
+    def test_stopword_preset_only_is_valid(self) -> None:
+        cfg = _TextAnalyzerConfigCreate(stopword_preset="en")
+        assert cfg.stopwordPreset == "en"
+
+    def test_empty_config_is_valid(self) -> None:
+        cfg = _TextAnalyzerConfigCreate()
+        assert cfg.asciiFold is None
+        assert cfg.asciiFoldIgnore is None
+        assert cfg.stopwordPreset is None
+
+
+# ---------------------------------------------------------------------------
+# Async client
+# ---------------------------------------------------------------------------
+
+
+class TestAsyncClient:
+    """Verify both text() and property() work through the async client."""
+
+    @pytest.mark.asyncio
+    async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None:
+        result = await async_client.tokenize.text(
+            text="The quick brown fox",
+            tokenization=Tokenization.WORD,
+        )
+        assert isinstance(result, TokenizeResult)
+        assert result.indexed == ["the", "quick", "brown", "fox"]
+
+    @pytest.mark.asyncio
+    async def test_text_with_analyzer_config(
+        self, async_client: weaviate.WeaviateAsyncClient
+    ) -> None:
+        cfg = _TextAnalyzerConfigCreate(ascii_fold=True, stopword_preset=StopwordsPreset.EN)
+        result = await async_client.tokenize.text(
+            text="L'école est fermée",
+            tokenization=Tokenization.WORD,
+            analyzer_config=cfg,
+        )
+        assert result.indexed == ["l", "ecole", "est", "fermee"]
+        assert isinstance(result.analyzer_config, TextAnalyzerConfig)
+        assert result.analyzer_config.ascii_fold is True
+
+    @pytest.mark.asyncio
+    async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None:
+        await async_client.collections.delete("TestAsyncPropTokenize")
+        try:
+            await async_client.collections.create_from_dict(
+                {
+                    "class": "TestAsyncPropTokenize",
+                    "vectorizer": "none",
+                    "properties": [
+                        {
+                            "name": "title",
+                            "dataType": ["text"],
+                            "tokenization": "word",
+                            "textAnalyzer": {"stopwordPreset": "en"},
+                        },
+                    ],
+                }
+            )
+            result = await async_client.tokenize.property(
+                collection_name="TestAsyncPropTokenize",
+                property_name="title",
+                text="The quick brown fox",
+            )
+            assert isinstance(result, TokenizeResult)
+            assert result.tokenization == "word"
+            assert result.indexed == ["the", "quick", "brown", "fox"]
+            assert "the" not in result.query
+            assert "quick" in result.query
+        finally:
+            await async_client.collections.delete("TestAsyncPropTokenize")
diff --git a/weaviate/__init__.py b/weaviate/__init__.py
index 562b142bc..6fd9368ea 100644
--- a/weaviate/__init__.py
+++ b/weaviate/__init__.py
@@ -21,6 +21,7 @@
     embedded,
     exceptions,
     outputs,
+    tokenize,
     types,
 )
 from .client import Client, WeaviateAsyncClient, WeaviateClient
@@ -67,6 +68,7 @@
     "embedded",
     "exceptions",
     "outputs",
+    "tokenize",
     "types",
     "use_async_with_custom",
     "use_async_with_embedded",
diff --git a/weaviate/client.py b/weaviate/client.py
index d7f9080f4..cbd12be9a 100644
--- a/weaviate/client.py
+++ b/weaviate/client.py
@@ -22,6 +22,7 @@
 from .embedded import EmbeddedOptions
 from .groups import _Groups, _GroupsAsync
 from .rbac import _Roles, _RolesAsync
+from .tokenize import _Tokenize, _TokenizeAsync
 from .types import NUMBER
 from .users import _Users, _UsersAsync
 
@@ -82,6 +83,7 @@ def __init__(
         self.debug = _DebugAsync(self._connection)
         self.groups = _GroupsAsync(self._connection)
         self.roles = _RolesAsync(self._connection)
+        self.tokenize = _TokenizeAsync(self._connection)
         self.users = _UsersAsync(self._connection)
 
     async def __aenter__(self) -> "WeaviateAsyncClient":
@@ -157,6 +159,7 @@ def __init__(
         self.debug = _Debug(self._connection)
         self.groups = _Groups(self._connection)
         self.roles = _Roles(self._connection)
+        self.tokenize = _Tokenize(self._connection)
         self.users = _Users(self._connection)
 
     def __enter__(self) -> "WeaviateClient":
diff --git a/weaviate/client.pyi b/weaviate/client.pyi
index 9b32af15f..a6a44f8f7 100644
--- a/weaviate/client.pyi
+++ b/weaviate/client.pyi
@@ -21,6 +21,7 @@ from .cluster import _Cluster, _ClusterAsync
 from .collections.batch.client import _BatchClientWrapper, _BatchClientWrapperAsync
 from .debug import _Debug, _DebugAsync
 from .rbac import _Roles, _RolesAsync
+from .tokenize import _Tokenize, _TokenizeAsync
 from .types import NUMBER
 
 TIMEOUT_TYPE = Union[Tuple[NUMBER, NUMBER], NUMBER]
@@ -35,6 +36,7 @@ class WeaviateAsyncClient(_WeaviateClientExecutor[ConnectionAsync]):
     debug: _DebugAsync
     groups: _GroupsAsync
     roles: _RolesAsync
+    tokenize: _TokenizeAsync
     users: _UsersAsync
 
     async def close(self) -> None: ...
@@ -58,6 +60,7 @@ class WeaviateClient(_WeaviateClientExecutor[ConnectionSync]):
     debug: _Debug
     groups: _Groups
     roles: _Roles
+    tokenize: _Tokenize
     users: _Users
 
     def close(self) -> None: ...
diff --git a/weaviate/tokenize/__init__.py b/weaviate/tokenize/__init__.py
new file mode 100644
index 000000000..d0c2883c5
--- /dev/null
+++ b/weaviate/tokenize/__init__.py
@@ -0,0 +1,7 @@
+"""Module for tokenize operations."""
+
+from .async_ import _TokenizeAsync
+from .sync import _Tokenize
+from .types import TokenizeResult
+
+__all__ = ["_Tokenize", "_TokenizeAsync", "TokenizeResult"]
diff --git a/weaviate/tokenize/async_.py b/weaviate/tokenize/async_.py
new file mode 100644
index 000000000..a59c392ea
--- /dev/null
+++ b/weaviate/tokenize/async_.py
@@ -0,0 +1,8 @@
+from weaviate.connect import executor
+from weaviate.connect.v4 import ConnectionAsync
+from weaviate.tokenize.executor import _TokenizeExecutor
+
+
+@executor.wrap("async")
+class _TokenizeAsync(_TokenizeExecutor[ConnectionAsync]):
+    pass
diff --git a/weaviate/tokenize/executor.py b/weaviate/tokenize/executor.py
new file mode 100644
index 000000000..bd2c24dc1
--- /dev/null
+++ b/weaviate/tokenize/executor.py
@@ -0,0 +1,166 @@
+"""Tokenize executor."""
+
+from typing import Any, Dict, Generic, List, Optional, Union
+
+from httpx import Response
+
+from weaviate.collections.classes.config import (
+    StopwordsConfig,
+    StopwordsPreset,
+    TextAnalyzerConfig,
+    Tokenization,
+    _StopwordsCreate,
+    _TextAnalyzerConfigCreate,
+)
+from weaviate.connect import executor
+from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes
+from weaviate.tokenize.types import TokenizeResult
+
+
+def _parse_analyzer_config(body: Dict[str, Any]) -> Optional[TextAnalyzerConfig]:
+    ac = body.get("analyzerConfig")
+    if ac is None:
+        return None
+    if "asciiFold" not in ac and "stopwordPreset" not in ac:
+        return None
+    return TextAnalyzerConfig(
+        ascii_fold=ac.get("asciiFold", False),
+        ascii_fold_ignore=ac.get("asciiFoldIgnore"),
+        stopword_preset=ac.get("stopwordPreset"),
+    )
+
+
+def _parse_stopword_config(body: Dict[str, Any]) -> Optional[StopwordsConfig]:
+    sc = body.get("stopwordConfig")
+    if sc is None:
+        return None
+    return StopwordsConfig(
+        preset=StopwordsPreset(sc["preset"]) if sc.get("preset") else StopwordsPreset.NONE,
+        additions=sc.get("additions"),
+        removals=sc.get("removals"),
+    )
+
+
+def _parse_tokenize_result(body: Dict[str, Any]) -> TokenizeResult:
+    return TokenizeResult(
+        tokenization=body["tokenization"],
+        indexed=body["indexed"],
+        query=body["query"],
+        analyzer_config=_parse_analyzer_config(body),
+        stopword_config=_parse_stopword_config(body),
+    )
+
+
+class _TokenizeExecutor(Generic[ConnectionType]):
+    def __init__(self, connection: ConnectionType):
+        self._connection = connection
+
+    def text(
+        self,
+        text: str,
+        tokenization: Union[Tokenization, str],
+        *,
+        analyzer_config: Optional[_TextAnalyzerConfigCreate] = None,
+        ascii_fold: Optional[bool] = None,
+        ascii_fold_ignore: Optional[List[str]] = None,
+        stopword_preset: Optional[Union[StopwordsPreset, str]] = None,
+        stopword_presets: Optional[Dict[str, _StopwordsCreate]] = None,
+    ) -> executor.Result[TokenizeResult]:
+        """Tokenize text using the generic /v1/tokenize endpoint.
+
+        Analyzer settings can be provided either via a ``_TextAnalyzerConfigCreate``
+        object **or** via the individual keyword arguments (``ascii_fold``,
+        ``ascii_fold_ignore``, ``stopword_preset``).  If ``analyzer_config`` is
+        given the individual keyword arguments are ignored.
+
+        Args:
+            text: The text to tokenize.
+            tokenization: The tokenization method to use (e.g. Tokenization.WORD).
+            analyzer_config: A ``_TextAnalyzerConfigCreate`` instance that bundles
+                ascii_fold, ascii_fold_ignore, and stopword_preset settings.
+            ascii_fold: Whether to fold accented characters to ASCII equivalents.
+            ascii_fold_ignore: Characters to exclude from ASCII folding.
+            stopword_preset: Stopword preset name to apply for query-time filtering.
+            stopword_presets: Custom stopword preset definitions, keyed by name.
+                Each value is a ``_StopwordsCreate`` with optional preset, additions,
+                and removals fields.
+
+        Returns:
+            A TokenizeResult with indexed and query token lists.
+        """
+        tokenization_str = (
+            tokenization.value if isinstance(tokenization, Tokenization) else tokenization
+        )
+
+        payload: Dict[str, Any] = {
+            "text": text,
+            "tokenization": tokenization_str,
+        }
+
+        if analyzer_config is not None:
+            ac_dict = analyzer_config._to_dict()
+            if ac_dict:
+                payload["analyzerConfig"] = ac_dict
+        else:
+            ac: Dict[str, Any] = {}
+            if ascii_fold is not None:
+                ac["asciiFold"] = ascii_fold
+            if ascii_fold_ignore is not None:
+                ac["asciiFoldIgnore"] = ascii_fold_ignore
+            if stopword_preset is not None:
+                ac["stopwordPreset"] = (
+                    stopword_preset.value
+                    if isinstance(stopword_preset, StopwordsPreset)
+                    else stopword_preset
+                )
+            if ac:
+                payload["analyzerConfig"] = ac
+
+        if stopword_presets is not None:
+            payload["stopwordPresets"] = {
+                name: cfg._to_dict() for name, cfg in stopword_presets.items()
+            }
+
+        def resp(response: Response) -> TokenizeResult:
+            return _parse_tokenize_result(response.json())
+
+        return executor.execute(
+            response_callback=resp,
+            method=self._connection.post,
+            path="/tokenize",
+            weaviate_object=payload,
+            error_msg="Tokenization failed",
+            status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"),
+        )
+
+    def property(
+        self,
+        collection_name: str,
+        property_name: str,
+        text: str,
+    ) -> executor.Result[TokenizeResult]:
+        """Tokenize text using a property's configured tokenization settings.
+
+        Args:
+            collection_name: The collection (class) name.
+            property_name: The property name whose tokenization config to use.
+            text: The text to tokenize.
+
+        Returns:
+            A TokenizeResult with indexed and query token lists.
+        """
+        path = f"/schema/{collection_name}/properties/{property_name}/tokenize"
+
+        payload: Dict[str, Any] = {"text": text}
+
+        def resp(response: Response) -> TokenizeResult:
+            return _parse_tokenize_result(response.json())
+
+        return executor.execute(
+            response_callback=resp,
+            method=self._connection.post,
+            path=path,
+            weaviate_object=payload,
+            error_msg="Property tokenization failed",
+            status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"),
+        )
diff --git a/weaviate/tokenize/sync.py b/weaviate/tokenize/sync.py
new file mode 100644
index 000000000..755c42559
--- /dev/null
+++ b/weaviate/tokenize/sync.py
@@ -0,0 +1,8 @@
+from weaviate.connect import executor
+from weaviate.connect.v4 import ConnectionSync
+from weaviate.tokenize.executor import _TokenizeExecutor
+
+
+@executor.wrap("sync")
+class _Tokenize(_TokenizeExecutor[ConnectionSync]):
+    pass
diff --git a/weaviate/tokenize/types.py b/weaviate/tokenize/types.py
new file mode 100644
index 000000000..ba4009b2d
--- /dev/null
+++ b/weaviate/tokenize/types.py
@@ -0,0 +1,25 @@
+"""Return types for tokenize operations."""
+
+from dataclasses import dataclass, field
+from typing import List, Optional
+
+from weaviate.collections.classes.config import StopwordsConfig, TextAnalyzerConfig
+
+
+@dataclass
+class TokenizeResult:
+    """Result of a tokenization operation.
+
+    Attributes:
+        tokenization: The tokenization method that was applied.
+        indexed: Tokens as they would be stored in the inverted index.
+        query: Tokens as they would be used for querying (after stopword removal).
+        analyzer_config: The text analyzer configuration that was used, if any.
+        stopword_config: The stopword configuration that was used, if any.
+    """
+
+    tokenization: str
+    indexed: List[str]
+    query: List[str]
+    analyzer_config: Optional[TextAnalyzerConfig] = field(default=None)
+    stopword_config: Optional[StopwordsConfig] = field(default=None)

From 8b2caaf7356223370b3eba7665d7c5e59c685be1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Wed, 15 Apr 2026 10:39:17 +0100
Subject: [PATCH 2/8] refactor: names don't shadow existing

---
 integration/test_tokenize.py                  | 38 +++++++++----------
 weaviate/__init__.py                          |  4 +-
 weaviate/client.py                            |  6 +--
 weaviate/client.pyi                           |  6 +--
 weaviate/tokenization/__init__.py             |  7 ++++
 weaviate/tokenization/async_.py               |  8 ++++
 .../{tokenize => tokenization}/executor.py    |  6 +--
 .../types.py => tokenization/models.py}       |  0
 weaviate/tokenization/sync.py                 |  8 ++++
 weaviate/tokenize/__init__.py                 |  7 ----
 weaviate/tokenize/async_.py                   |  8 ----
 weaviate/tokenize/sync.py                     |  8 ----
 12 files changed, 53 insertions(+), 53 deletions(-)
 create mode 100644 weaviate/tokenization/__init__.py
 create mode 100644 weaviate/tokenization/async_.py
 rename weaviate/{tokenize => tokenization}/executor.py (97%)
 rename weaviate/{tokenize/types.py => tokenization/models.py} (100%)
 create mode 100644 weaviate/tokenization/sync.py
 delete mode 100644 weaviate/tokenize/__init__.py
 delete mode 100644 weaviate/tokenize/async_.py
 delete mode 100644 weaviate/tokenize/sync.py

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index e54f9d49d..b3ecff875 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -22,7 +22,7 @@
     _TextAnalyzerConfigCreate,
 )
 from weaviate.config import AdditionalConfig
-from weaviate.tokenize.types import TokenizeResult
+from weaviate.tokenization.models import TokenizeResult
 
 
 @pytest.fixture(scope="module")
@@ -69,19 +69,19 @@ def test_tokenization_enum(
         text: str,
         expected_tokens: list,
     ) -> None:
-        result = client.tokenize.text(text=text, tokenization=tokenization)
+        result = client.tokenization.text(text=text, tokenization=tokenization)
         assert isinstance(result, TokenizeResult)
         assert result.tokenization == tokenization.value
         assert result.indexed == expected_tokens
         assert result.query == expected_tokens
 
     def test_tokenization_string(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenize.text(text="hello world", tokenization="word")
+        result = client.tokenization.text(text="hello world", tokenization="word")
         assert result.tokenization == "word"
         assert result.indexed == ["hello", "world"]
 
     def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenize.text(
+        result = client.tokenization.text(
             text="The quick brown fox",
             tokenization=Tokenization.WORD,
             stopword_preset=StopwordsPreset.EN,
@@ -90,7 +90,7 @@ def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None:
         assert "quick" in result.query
 
     def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenize.text(
+        result = client.tokenization.text(
             text="The quick brown fox",
             tokenization=Tokenization.WORD,
             stopword_preset="en",
@@ -98,7 +98,7 @@ def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None:
         assert "the" not in result.query
 
     def test_ascii_fold_via_kwargs(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenize.text(
+        result = client.tokenization.text(
             text="L'école est fermée",
             tokenization=Tokenization.WORD,
             ascii_fold=True,
@@ -107,7 +107,7 @@ def test_ascii_fold_via_kwargs(self, client: weaviate.WeaviateClient) -> None:
 
     def test_ascii_fold_via_analyzer_config(self, client: weaviate.WeaviateClient) -> None:
         cfg = _TextAnalyzerConfigCreate(ascii_fold=True)
-        result = client.tokenize.text(
+        result = client.tokenization.text(
             text="L'école est fermée",
             tokenization=Tokenization.WORD,
             analyzer_config=cfg,
@@ -121,12 +121,12 @@ def test_analyzer_config_and_kwargs_produce_same_result(
         cfg = _TextAnalyzerConfigCreate(
             ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN
         )
-        via_config = client.tokenize.text(
+        via_config = client.tokenization.text(
             text="L'école est fermée",
             tokenization=Tokenization.WORD,
             analyzer_config=cfg,
         )
-        via_kwargs = client.tokenize.text(
+        via_kwargs = client.tokenization.text(
             text="L'école est fermée",
             tokenization=Tokenization.WORD,
             ascii_fold=True,
@@ -137,7 +137,7 @@ def test_analyzer_config_and_kwargs_produce_same_result(
         assert via_config.query == via_kwargs.query
 
     def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenize.text(
+        result = client.tokenization.text(
             text="hello world test",
             tokenization=Tokenization.WORD,
             stopword_preset="custom",
@@ -149,7 +149,7 @@ def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) -
         assert result.query == ["hello", "world"]
 
     def test_stopword_presets_with_base_and_removals(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenize.text(
+        result = client.tokenization.text(
             text="the quick",
             tokenization=Tokenization.WORD,
             stopword_preset="en-no-the",
@@ -172,13 +172,13 @@ class TestDeserialization:
     """Verify the client correctly deserializes response fields into typed objects."""
 
     def test_result_type(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenize.text(text="hello", tokenization=Tokenization.WORD)
+        result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD)
         assert isinstance(result, TokenizeResult)
         assert isinstance(result.indexed, list)
         assert isinstance(result.query, list)
 
     def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenize.text(
+        result = client.tokenization.text(
             text="L'école",
             tokenization=Tokenization.WORD,
             ascii_fold=True,
@@ -191,7 +191,7 @@ def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) ->
         assert result.analyzer_config.stopword_preset == "en"
 
     def test_no_analyzer_config_returns_none(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenize.text(text="hello", tokenization=Tokenization.WORD)
+        result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD)
         assert result.analyzer_config is None
 
     def test_stopword_config_deserialized_on_property(
@@ -214,7 +214,7 @@ def test_stopword_config_deserialized_on_property(
                     ],
                 }
             )
-            result = client.tokenize.property(
+            result = client.tokenization.for_property(
                 collection_name="TestDeserStopword",
                 property_name="title",
                 text="the quick",
@@ -243,7 +243,7 @@ def test_property_result_types(self, client: weaviate.WeaviateClient) -> None:
                     ],
                 }
             )
-            result = client.tokenize.property(
+            result = client.tokenization.for_property(
                 collection_name="TestDeserPropTypes",
                 property_name="tag",
                 text="  Hello World  ",
@@ -302,7 +302,7 @@ class TestAsyncClient:
 
     @pytest.mark.asyncio
     async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None:
-        result = await async_client.tokenize.text(
+        result = await async_client.tokenization.text(
             text="The quick brown fox",
             tokenization=Tokenization.WORD,
         )
@@ -314,7 +314,7 @@ async def test_text_with_analyzer_config(
         self, async_client: weaviate.WeaviateAsyncClient
     ) -> None:
         cfg = _TextAnalyzerConfigCreate(ascii_fold=True, stopword_preset=StopwordsPreset.EN)
-        result = await async_client.tokenize.text(
+        result = await async_client.tokenization.text(
             text="L'école est fermée",
             tokenization=Tokenization.WORD,
             analyzer_config=cfg,
@@ -341,7 +341,7 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien
                     ],
                 }
             )
-            result = await async_client.tokenize.property(
+            result = await async_client.tokenization.for_property(
                 collection_name="TestAsyncPropTokenize",
                 property_name="title",
                 text="The quick brown fox",
diff --git a/weaviate/__init__.py b/weaviate/__init__.py
index 6fd9368ea..2e7e5e58b 100644
--- a/weaviate/__init__.py
+++ b/weaviate/__init__.py
@@ -21,7 +21,7 @@
     embedded,
     exceptions,
     outputs,
-    tokenize,
+    tokenization,
     types,
 )
 from .client import Client, WeaviateAsyncClient, WeaviateClient
@@ -68,7 +68,7 @@
     "embedded",
     "exceptions",
     "outputs",
-    "tokenize",
+    "tokenization",
     "types",
     "use_async_with_custom",
     "use_async_with_embedded",
diff --git a/weaviate/client.py b/weaviate/client.py
index cbd12be9a..10ce4c77b 100644
--- a/weaviate/client.py
+++ b/weaviate/client.py
@@ -22,7 +22,7 @@
 from .embedded import EmbeddedOptions
 from .groups import _Groups, _GroupsAsync
 from .rbac import _Roles, _RolesAsync
-from .tokenize import _Tokenize, _TokenizeAsync
+from .tokenization import _Tokenization, _TokenizationAsync
 from .types import NUMBER
 from .users import _Users, _UsersAsync
 
@@ -83,7 +83,7 @@ def __init__(
         self.debug = _DebugAsync(self._connection)
         self.groups = _GroupsAsync(self._connection)
         self.roles = _RolesAsync(self._connection)
-        self.tokenize = _TokenizeAsync(self._connection)
+        self.tokenization = _TokenizationAsync(self._connection)
         self.users = _UsersAsync(self._connection)
 
     async def __aenter__(self) -> "WeaviateAsyncClient":
@@ -159,7 +159,7 @@ def __init__(
         self.debug = _Debug(self._connection)
         self.groups = _Groups(self._connection)
         self.roles = _Roles(self._connection)
-        self.tokenize = _Tokenize(self._connection)
+        self.tokenization = _Tokenization(self._connection)
         self.users = _Users(self._connection)
 
     def __enter__(self) -> "WeaviateClient":
diff --git a/weaviate/client.pyi b/weaviate/client.pyi
index a6a44f8f7..8fafdc3d1 100644
--- a/weaviate/client.pyi
+++ b/weaviate/client.pyi
@@ -21,7 +21,7 @@ from .cluster import _Cluster, _ClusterAsync
 from .collections.batch.client import _BatchClientWrapper, _BatchClientWrapperAsync
 from .debug import _Debug, _DebugAsync
 from .rbac import _Roles, _RolesAsync
-from .tokenize import _Tokenize, _TokenizeAsync
+from .tokenization import _Tokenization, _TokenizationAsync
 from .types import NUMBER
 
 TIMEOUT_TYPE = Union[Tuple[NUMBER, NUMBER], NUMBER]
@@ -36,7 +36,7 @@ class WeaviateAsyncClient(_WeaviateClientExecutor[ConnectionAsync]):
     debug: _DebugAsync
     groups: _GroupsAsync
     roles: _RolesAsync
-    tokenize: _TokenizeAsync
+    tokenization: _TokenizationAsync
     users: _UsersAsync
 
     async def close(self) -> None: ...
@@ -60,7 +60,7 @@ class WeaviateClient(_WeaviateClientExecutor[ConnectionSync]):
     debug: _Debug
     groups: _Groups
     roles: _Roles
-    tokenize: _Tokenize
+    tokenization: _Tokenization
     users: _Users
 
     def close(self) -> None: ...
diff --git a/weaviate/tokenization/__init__.py b/weaviate/tokenization/__init__.py
new file mode 100644
index 000000000..2437f7745
--- /dev/null
+++ b/weaviate/tokenization/__init__.py
@@ -0,0 +1,7 @@
+"""Module for tokenization operations."""
+
+from .async_ import _TokenizationAsync
+from .sync import _Tokenization
+from .models import TokenizeResult
+
+__all__ = ["_Tokenization", "_TokenizationAsync", "TokenizeResult"]
diff --git a/weaviate/tokenization/async_.py b/weaviate/tokenization/async_.py
new file mode 100644
index 000000000..5406a39dd
--- /dev/null
+++ b/weaviate/tokenization/async_.py
@@ -0,0 +1,8 @@
+from weaviate.connect import executor
+from weaviate.connect.v4 import ConnectionAsync
+from weaviate.tokenization.executor import _TokenizationExecutor
+
+
+@executor.wrap("async")
+class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]):
+    pass
diff --git a/weaviate/tokenize/executor.py b/weaviate/tokenization/executor.py
similarity index 97%
rename from weaviate/tokenize/executor.py
rename to weaviate/tokenization/executor.py
index bd2c24dc1..9ddf5f7ed 100644
--- a/weaviate/tokenize/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -14,7 +14,7 @@
 )
 from weaviate.connect import executor
 from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes
-from weaviate.tokenize.types import TokenizeResult
+from weaviate.tokenization.models import TokenizeResult
 
 
 def _parse_analyzer_config(body: Dict[str, Any]) -> Optional[TextAnalyzerConfig]:
@@ -51,7 +51,7 @@ def _parse_tokenize_result(body: Dict[str, Any]) -> TokenizeResult:
     )
 
 
-class _TokenizeExecutor(Generic[ConnectionType]):
+class _TokenizationExecutor(Generic[ConnectionType]):
     def __init__(self, connection: ConnectionType):
         self._connection = connection
 
@@ -133,7 +133,7 @@ def resp(response: Response) -> TokenizeResult:
             status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"),
         )
 
-    def property(
+    def for_property(
         self,
         collection_name: str,
         property_name: str,
diff --git a/weaviate/tokenize/types.py b/weaviate/tokenization/models.py
similarity index 100%
rename from weaviate/tokenize/types.py
rename to weaviate/tokenization/models.py
diff --git a/weaviate/tokenization/sync.py b/weaviate/tokenization/sync.py
new file mode 100644
index 000000000..ab28cc98e
--- /dev/null
+++ b/weaviate/tokenization/sync.py
@@ -0,0 +1,8 @@
+from weaviate.connect import executor
+from weaviate.connect.v4 import ConnectionSync
+from weaviate.tokenization.executor import _TokenizationExecutor
+
+
+@executor.wrap("sync")
+class _Tokenization(_TokenizationExecutor[ConnectionSync]):
+    pass
diff --git a/weaviate/tokenize/__init__.py b/weaviate/tokenize/__init__.py
deleted file mode 100644
index d0c2883c5..000000000
--- a/weaviate/tokenize/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""Module for tokenize operations."""
-
-from .async_ import _TokenizeAsync
-from .sync import _Tokenize
-from .types import TokenizeResult
-
-__all__ = ["_Tokenize", "_TokenizeAsync", "TokenizeResult"]
diff --git a/weaviate/tokenize/async_.py b/weaviate/tokenize/async_.py
deleted file mode 100644
index a59c392ea..000000000
--- a/weaviate/tokenize/async_.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from weaviate.connect import executor
-from weaviate.connect.v4 import ConnectionAsync
-from weaviate.tokenize.executor import _TokenizeExecutor
-
-
-@executor.wrap("async")
-class _TokenizeAsync(_TokenizeExecutor[ConnectionAsync]):
-    pass
diff --git a/weaviate/tokenize/sync.py b/weaviate/tokenize/sync.py
deleted file mode 100644
index 755c42559..000000000
--- a/weaviate/tokenize/sync.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from weaviate.connect import executor
-from weaviate.connect.v4 import ConnectionSync
-from weaviate.tokenize.executor import _TokenizeExecutor
-
-
-@executor.wrap("sync")
-class _Tokenize(_TokenizeExecutor[ConnectionSync]):
-    pass

From ede0b96477aa44f5de9fb964e8a48a84e126b408 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Wed, 15 Apr 2026 10:47:01 +0100
Subject: [PATCH 3/8] fix: add version gate

---
 integration/test_tokenize.py      | 24 ++++++++++++++++++++++++
 weaviate/tokenization/executor.py | 17 +++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index b3ecff875..47321aaf5 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -22,6 +22,7 @@
     _TextAnalyzerConfigCreate,
 )
 from weaviate.config import AdditionalConfig
+from weaviate.exceptions import WeaviateUnsupportedFeatureError
 from weaviate.tokenization.models import TokenizeResult
 
 
@@ -292,6 +293,29 @@ def test_empty_config_is_valid(self) -> None:
         assert cfg.stopwordPreset is None
 
 
+# ---------------------------------------------------------------------------
+# Version gate
+# ---------------------------------------------------------------------------
+
+
+class TestVersionGate:
+    """On Weaviate < 1.37 the client must raise before sending the request."""
+
+    def test_text_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None:
+        if client._connection._weaviate_version.is_at_least(1, 37, 0):
+            pytest.skip("Version gate only applies to Weaviate < 1.37.0")
+        with pytest.raises(WeaviateUnsupportedFeatureError):
+            client.tokenization.text(text="hello", tokenization=Tokenization.WORD)
+
+    def test_for_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None:
+        if client._connection._weaviate_version.is_at_least(1, 37, 0):
+            pytest.skip("Version gate only applies to Weaviate < 1.37.0")
+        with pytest.raises(WeaviateUnsupportedFeatureError):
+            client.tokenization.for_property(
+                collection_name="Any", property_name="title", text="hello"
+            )
+
+
 # ---------------------------------------------------------------------------
 # Async client
 # ---------------------------------------------------------------------------
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index 9ddf5f7ed..de3f68061 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -14,6 +14,7 @@
 )
 from weaviate.connect import executor
 from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes
+from weaviate.exceptions import WeaviateUnsupportedFeatureError
 from weaviate.tokenization.models import TokenizeResult
 
 
@@ -55,6 +56,14 @@ class _TokenizationExecutor(Generic[ConnectionType]):
     def __init__(self, connection: ConnectionType):
         self._connection = connection
 
+    def _check_version(self) -> None:
+        if self._connection._weaviate_version.is_lower_than(1, 37, 0):
+            raise WeaviateUnsupportedFeatureError(
+                "Tokenization",
+                str(self._connection._weaviate_version),
+                "1.37.0",
+            )
+
     def text(
         self,
         text: str,
@@ -87,7 +96,11 @@ def text(
 
         Returns:
             A TokenizeResult with indexed and query token lists.
+
+        Raises:
+            WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
         """
+        self._check_version()
         tokenization_str = (
             tokenization.value if isinstance(tokenization, Tokenization) else tokenization
         )
@@ -148,7 +161,11 @@ def for_property(
 
         Returns:
             A TokenizeResult with indexed and query token lists.
+
+        Raises:
+            WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
         """
+        self._check_version()
         path = f"/schema/{collection_name}/properties/{property_name}/tokenize"
 
         payload: Dict[str, Any] = {"text": text}

From 8d379f4142222f7483d29042a9ec75f92c2e4cc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Wed, 15 Apr 2026 10:50:22 +0100
Subject: [PATCH 4/8] refactor: update tokenization type to use Tokenization
 enum in TokenizeResult and related tests

---
 integration/test_tokenize.py      | 10 +++++-----
 weaviate/tokenization/executor.py |  2 +-
 weaviate/tokenization/models.py   |  4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index 47321aaf5..24b515f2c 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -72,13 +72,13 @@ def test_tokenization_enum(
     ) -> None:
         result = client.tokenization.text(text=text, tokenization=tokenization)
         assert isinstance(result, TokenizeResult)
-        assert result.tokenization == tokenization.value
+        assert result.tokenization == tokenization
         assert result.indexed == expected_tokens
         assert result.query == expected_tokens
 
     def test_tokenization_string(self, client: weaviate.WeaviateClient) -> None:
         result = client.tokenization.text(text="hello world", tokenization="word")
-        assert result.tokenization == "word"
+        assert result.tokenization == Tokenization.WORD
         assert result.indexed == ["hello", "world"]
 
     def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None:
@@ -221,7 +221,7 @@ def test_stopword_config_deserialized_on_property(
                 text="the quick",
             )
             assert isinstance(result, TokenizeResult)
-            assert result.tokenization == "word"
+            assert result.tokenization == Tokenization.WORD
             # Stopword config should be deserialized when present
             if result.stopword_config is not None:
                 assert isinstance(result.stopword_config, StopwordsConfig)
@@ -250,7 +250,7 @@ def test_property_result_types(self, client: weaviate.WeaviateClient) -> None:
                 text="  Hello World  ",
             )
             assert isinstance(result, TokenizeResult)
-            assert result.tokenization == "field"
+            assert result.tokenization == Tokenization.FIELD
             assert result.indexed == ["Hello World"]
         finally:
             client.collections.delete("TestDeserPropTypes")
@@ -371,7 +371,7 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien
                 text="The quick brown fox",
             )
             assert isinstance(result, TokenizeResult)
-            assert result.tokenization == "word"
+            assert result.tokenization == Tokenization.WORD
             assert result.indexed == ["the", "quick", "brown", "fox"]
             assert "the" not in result.query
             assert "quick" in result.query
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index de3f68061..6228d7350 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -44,7 +44,7 @@ def _parse_stopword_config(body: Dict[str, Any]) -> Optional[StopwordsConfig]:
 
 def _parse_tokenize_result(body: Dict[str, Any]) -> TokenizeResult:
     return TokenizeResult(
-        tokenization=body["tokenization"],
+        tokenization=Tokenization(body["tokenization"]),
         indexed=body["indexed"],
         query=body["query"],
         analyzer_config=_parse_analyzer_config(body),
diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py
index ba4009b2d..ecb01f695 100644
--- a/weaviate/tokenization/models.py
+++ b/weaviate/tokenization/models.py
@@ -3,7 +3,7 @@
 from dataclasses import dataclass, field
 from typing import List, Optional
 
-from weaviate.collections.classes.config import StopwordsConfig, TextAnalyzerConfig
+from weaviate.collections.classes.config import StopwordsConfig, TextAnalyzerConfig, Tokenization
 
 
 @dataclass
@@ -18,7 +18,7 @@ class TokenizeResult:
         stopword_config: The stopword configuration that was used, if any.
     """
 
-    tokenization: str
+    tokenization: Tokenization
     indexed: List[str]
     query: List[str]
     analyzer_config: Optional[TextAnalyzerConfig] = field(default=None)

From 91a359a38a56b7b812997b3e8280be6ae1d7b71e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Wed, 15 Apr 2026 11:04:13 +0100
Subject: [PATCH 5/8] refactor: models

---
 integration/test_tokenize.py      | 91 +++++++++++++++----------------
 weaviate/outputs/__init__.py      | 15 ++++-
 weaviate/outputs/tokenization.py  |  5 ++
 weaviate/tokenization/executor.py | 78 ++------------------------
 weaviate/tokenization/models.py   | 47 +++++++++++++---
 5 files changed, 108 insertions(+), 128 deletions(-)
 create mode 100644 weaviate/outputs/tokenization.py

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index 24b515f2c..ddd67b656 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -1,9 +1,10 @@
-"""Integration tests for the tokenize module.
+"""Integration tests for the tokenization module.
 
 These tests cover the client's responsibilities:
-- Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, kwargs)
+- Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, _StopwordsCreate)
 - Correct deserialization of responses into typed objects
 - Client-side validation (_TextAnalyzerConfigCreate rejects invalid input)
+- Version gate (>= 1.37.0)
 - Both sync and async client paths
 """
 
@@ -46,7 +47,7 @@ async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]:
 
 
 # ---------------------------------------------------------------------------
-# Serialization: enums, strings, kwargs, _TextAnalyzerConfigCreate
+# Serialization
 # ---------------------------------------------------------------------------
 
 
@@ -76,72 +77,68 @@ def test_tokenization_enum(
         assert result.indexed == expected_tokens
         assert result.query == expected_tokens
 
-    def test_tokenization_string(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenization.text(text="hello world", tokenization="word")
+    def test_no_analyzer_config(self, client: weaviate.WeaviateClient) -> None:
+        result = client.tokenization.text(text="hello world", tokenization=Tokenization.WORD)
         assert result.tokenization == Tokenization.WORD
         assert result.indexed == ["hello", "world"]
+        assert result.analyzer_config is None
 
-    def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None:
+    def test_ascii_fold(self, client: weaviate.WeaviateClient) -> None:
+        cfg = _TextAnalyzerConfigCreate(ascii_fold=True)
         result = client.tokenization.text(
-            text="The quick brown fox",
+            text="L'école est fermée",
             tokenization=Tokenization.WORD,
-            stopword_preset=StopwordsPreset.EN,
+            analyzer_config=cfg,
         )
-        assert "the" not in result.query
-        assert "quick" in result.query
+        assert result.indexed == ["l", "ecole", "est", "fermee"]
 
-    def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None:
+    def test_ascii_fold_with_ignore(self, client: weaviate.WeaviateClient) -> None:
+        cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é"])
         result = client.tokenization.text(
-            text="The quick brown fox",
+            text="L'école est fermée",
             tokenization=Tokenization.WORD,
-            stopword_preset="en",
+            analyzer_config=cfg,
         )
-        assert "the" not in result.query
+        assert result.indexed == ["l", "école", "est", "fermée"]
 
-    def test_ascii_fold_via_kwargs(self, client: weaviate.WeaviateClient) -> None:
+    def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None:
+        cfg = _TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.EN)
         result = client.tokenization.text(
-            text="L'école est fermée",
+            text="The quick brown fox",
             tokenization=Tokenization.WORD,
-            ascii_fold=True,
+            analyzer_config=cfg,
         )
-        assert result.indexed == ["l", "ecole", "est", "fermee"]
+        assert "the" not in result.query
+        assert "quick" in result.query
 
-    def test_ascii_fold_via_analyzer_config(self, client: weaviate.WeaviateClient) -> None:
-        cfg = _TextAnalyzerConfigCreate(ascii_fold=True)
+    def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None:
+        cfg = _TextAnalyzerConfigCreate(stopword_preset="en")
         result = client.tokenization.text(
-            text="L'école est fermée",
+            text="The quick brown fox",
             tokenization=Tokenization.WORD,
             analyzer_config=cfg,
         )
-        assert result.indexed == ["l", "ecole", "est", "fermee"]
+        assert "the" not in result.query
 
-    def test_analyzer_config_and_kwargs_produce_same_result(
-        self, client: weaviate.WeaviateClient
-    ) -> None:
-        """analyzer_config object and equivalent kwargs must produce identical output."""
+    def test_ascii_fold_combined_with_stopwords(self, client: weaviate.WeaviateClient) -> None:
         cfg = _TextAnalyzerConfigCreate(
             ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN
         )
-        via_config = client.tokenization.text(
-            text="L'école est fermée",
+        result = client.tokenization.text(
+            text="The école est fermée",
             tokenization=Tokenization.WORD,
             analyzer_config=cfg,
         )
-        via_kwargs = client.tokenization.text(
-            text="L'école est fermée",
-            tokenization=Tokenization.WORD,
-            ascii_fold=True,
-            ascii_fold_ignore=["é"],
-            stopword_preset=StopwordsPreset.EN,
-        )
-        assert via_config.indexed == via_kwargs.indexed
-        assert via_config.query == via_kwargs.query
+        assert result.indexed == ["the", "école", "est", "fermée"]
+        assert "the" not in result.query
+        assert "école" in result.query
 
-    def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) -> None:
+    def test_stopword_presets_custom_additions(self, client: weaviate.WeaviateClient) -> None:
+        cfg = _TextAnalyzerConfigCreate(stopword_preset="custom")
         result = client.tokenization.text(
             text="hello world test",
             tokenization=Tokenization.WORD,
-            stopword_preset="custom",
+            analyzer_config=cfg,
             stopword_presets={
                 "custom": _StopwordsCreate(preset=None, additions=["test"], removals=None),
             },
@@ -150,10 +147,11 @@ def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) -
         assert result.query == ["hello", "world"]
 
     def test_stopword_presets_with_base_and_removals(self, client: weaviate.WeaviateClient) -> None:
+        cfg = _TextAnalyzerConfigCreate(stopword_preset="en-no-the")
         result = client.tokenization.text(
             text="the quick",
             tokenization=Tokenization.WORD,
-            stopword_preset="en-no-the",
+            analyzer_config=cfg,
             stopword_presets={
                 "en-no-the": _StopwordsCreate(
                     preset=StopwordsPreset.EN, additions=None, removals=["the"]
@@ -165,7 +163,7 @@ def test_stopword_presets_with_base_and_removals(self, client: weaviate.Weaviate
 
 
 # ---------------------------------------------------------------------------
-# Deserialization: typed response fields
+# Deserialization
 # ---------------------------------------------------------------------------
 
 
@@ -179,12 +177,13 @@ def test_result_type(self, client: weaviate.WeaviateClient) -> None:
         assert isinstance(result.query, list)
 
     def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> None:
+        cfg = _TextAnalyzerConfigCreate(
+            ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN
+        )
         result = client.tokenization.text(
             text="L'école",
             tokenization=Tokenization.WORD,
-            ascii_fold=True,
-            ascii_fold_ignore=["é"],
-            stopword_preset=StopwordsPreset.EN,
+            analyzer_config=cfg,
         )
         assert isinstance(result.analyzer_config, TextAnalyzerConfig)
         assert result.analyzer_config.ascii_fold is True
@@ -198,7 +197,6 @@ def test_no_analyzer_config_returns_none(self, client: weaviate.WeaviateClient)
     def test_stopword_config_deserialized_on_property(
         self, client: weaviate.WeaviateClient
     ) -> None:
-        """Property endpoint returns stopwordConfig; verify it deserializes to StopwordsConfig."""
         client.collections.delete("TestDeserStopword")
         try:
             client.collections.create_from_dict(
@@ -222,7 +220,6 @@ def test_stopword_config_deserialized_on_property(
             )
             assert isinstance(result, TokenizeResult)
             assert result.tokenization == Tokenization.WORD
-            # Stopword config should be deserialized when present
             if result.stopword_config is not None:
                 assert isinstance(result.stopword_config, StopwordsConfig)
         finally:
@@ -322,7 +319,7 @@ def test_for_property_raises_on_old_server(self, client: weaviate.WeaviateClient
 
 
 class TestAsyncClient:
-    """Verify both text() and property() work through the async client."""
+    """Verify both text() and for_property() work through the async client."""
 
     @pytest.mark.asyncio
     async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None:
diff --git a/weaviate/outputs/__init__.py b/weaviate/outputs/__init__.py
index 62193fc35..ba3cf894f 100644
--- a/weaviate/outputs/__init__.py
+++ b/weaviate/outputs/__init__.py
@@ -1,4 +1,16 @@
-from . import aggregate, backup, batch, cluster, config, data, query, replication, tenants, users
+from . import (
+    aggregate,
+    backup,
+    batch,
+    cluster,
+    config,
+    data,
+    query,
+    replication,
+    tenants,
+    tokenization,
+    users,
+)
 
 __all__ = [
     "aggregate",
@@ -10,5 +22,6 @@
     "query",
     "replication",
     "tenants",
+    "tokenization",
     "users",
 ]
diff --git a/weaviate/outputs/tokenization.py b/weaviate/outputs/tokenization.py
new file mode 100644
index 000000000..0854f8b0d
--- /dev/null
+++ b/weaviate/outputs/tokenization.py
@@ -0,0 +1,5 @@
+from weaviate.tokenization.models import TokenizeResult
+
+__all__ = [
+    "TokenizeResult",
+]
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index 6228d7350..226aeb6c6 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -1,13 +1,10 @@
 """Tokenize executor."""
 
-from typing import Any, Dict, Generic, List, Optional, Union
+from typing import Any, Dict, Generic, Optional
 
 from httpx import Response
 
 from weaviate.collections.classes.config import (
-    StopwordsConfig,
-    StopwordsPreset,
-    TextAnalyzerConfig,
     Tokenization,
     _StopwordsCreate,
     _TextAnalyzerConfigCreate,
@@ -18,40 +15,6 @@
 from weaviate.tokenization.models import TokenizeResult
 
 
-def _parse_analyzer_config(body: Dict[str, Any]) -> Optional[TextAnalyzerConfig]:
-    ac = body.get("analyzerConfig")
-    if ac is None:
-        return None
-    if "asciiFold" not in ac and "stopwordPreset" not in ac:
-        return None
-    return TextAnalyzerConfig(
-        ascii_fold=ac.get("asciiFold", False),
-        ascii_fold_ignore=ac.get("asciiFoldIgnore"),
-        stopword_preset=ac.get("stopwordPreset"),
-    )
-
-
-def _parse_stopword_config(body: Dict[str, Any]) -> Optional[StopwordsConfig]:
-    sc = body.get("stopwordConfig")
-    if sc is None:
-        return None
-    return StopwordsConfig(
-        preset=StopwordsPreset(sc["preset"]) if sc.get("preset") else StopwordsPreset.NONE,
-        additions=sc.get("additions"),
-        removals=sc.get("removals"),
-    )
-
-
-def _parse_tokenize_result(body: Dict[str, Any]) -> TokenizeResult:
-    return TokenizeResult(
-        tokenization=Tokenization(body["tokenization"]),
-        indexed=body["indexed"],
-        query=body["query"],
-        analyzer_config=_parse_analyzer_config(body),
-        stopword_config=_parse_stopword_config(body),
-    )
-
-
 class _TokenizationExecutor(Generic[ConnectionType]):
     def __init__(self, connection: ConnectionType):
         self._connection = connection
@@ -67,29 +30,17 @@ def _check_version(self) -> None:
     def text(
         self,
         text: str,
-        tokenization: Union[Tokenization, str],
+        tokenization: Tokenization,
         *,
         analyzer_config: Optional[_TextAnalyzerConfigCreate] = None,
-        ascii_fold: Optional[bool] = None,
-        ascii_fold_ignore: Optional[List[str]] = None,
-        stopword_preset: Optional[Union[StopwordsPreset, str]] = None,
         stopword_presets: Optional[Dict[str, _StopwordsCreate]] = None,
     ) -> executor.Result[TokenizeResult]:
         """Tokenize text using the generic /v1/tokenize endpoint.
 
-        Analyzer settings can be provided either via a ``_TextAnalyzerConfigCreate``
-        object **or** via the individual keyword arguments (``ascii_fold``,
-        ``ascii_fold_ignore``, ``stopword_preset``).  If ``analyzer_config`` is
-        given the individual keyword arguments are ignored.
-
         Args:
             text: The text to tokenize.
             tokenization: The tokenization method to use (e.g. Tokenization.WORD).
-            analyzer_config: A ``_TextAnalyzerConfigCreate`` instance that bundles
-                ascii_fold, ascii_fold_ignore, and stopword_preset settings.
-            ascii_fold: Whether to fold accented characters to ASCII equivalents.
-            ascii_fold_ignore: Characters to exclude from ASCII folding.
-            stopword_preset: Stopword preset name to apply for query-time filtering.
+            analyzer_config: Text analyzer settings (ASCII folding, stopword preset).
             stopword_presets: Custom stopword preset definitions, keyed by name.
                 Each value is a ``_StopwordsCreate`` with optional preset, additions,
                 and removals fields.
@@ -101,33 +52,16 @@ def text(
             WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
         """
         self._check_version()
-        tokenization_str = (
-            tokenization.value if isinstance(tokenization, Tokenization) else tokenization
-        )
 
         payload: Dict[str, Any] = {
             "text": text,
-            "tokenization": tokenization_str,
+            "tokenization": tokenization.value,
         }
 
         if analyzer_config is not None:
             ac_dict = analyzer_config._to_dict()
             if ac_dict:
                 payload["analyzerConfig"] = ac_dict
-        else:
-            ac: Dict[str, Any] = {}
-            if ascii_fold is not None:
-                ac["asciiFold"] = ascii_fold
-            if ascii_fold_ignore is not None:
-                ac["asciiFoldIgnore"] = ascii_fold_ignore
-            if stopword_preset is not None:
-                ac["stopwordPreset"] = (
-                    stopword_preset.value
-                    if isinstance(stopword_preset, StopwordsPreset)
-                    else stopword_preset
-                )
-            if ac:
-                payload["analyzerConfig"] = ac
 
         if stopword_presets is not None:
             payload["stopwordPresets"] = {
@@ -135,7 +69,7 @@ def text(
             }
 
         def resp(response: Response) -> TokenizeResult:
-            return _parse_tokenize_result(response.json())
+            return TokenizeResult.model_validate(response.json())
 
         return executor.execute(
             response_callback=resp,
@@ -171,7 +105,7 @@ def for_property(
         payload: Dict[str, Any] = {"text": text}
 
         def resp(response: Response) -> TokenizeResult:
-            return _parse_tokenize_result(response.json())
+            return TokenizeResult.model_validate(response.json())
 
         return executor.execute(
             response_callback=resp,
diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py
index ecb01f695..8bfa508f8 100644
--- a/weaviate/tokenization/models.py
+++ b/weaviate/tokenization/models.py
@@ -1,13 +1,18 @@
-"""Return types for tokenize operations."""
+"""Return types for tokenization operations."""
 
-from dataclasses import dataclass, field
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 
-from weaviate.collections.classes.config import StopwordsConfig, TextAnalyzerConfig, Tokenization
+from pydantic import BaseModel, ConfigDict, Field, field_validator
 
+from weaviate.collections.classes.config import (
+    StopwordsConfig,
+    StopwordsPreset,
+    TextAnalyzerConfig,
+    Tokenization,
+)
 
-@dataclass
-class TokenizeResult:
+
+class TokenizeResult(BaseModel):
     """Result of a tokenization operation.
 
     Attributes:
@@ -18,8 +23,34 @@ class TokenizeResult:
         stopword_config: The stopword configuration that was used, if any.
     """
 
+    model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True)
+
     tokenization: Tokenization
     indexed: List[str]
     query: List[str]
-    analyzer_config: Optional[TextAnalyzerConfig] = field(default=None)
-    stopword_config: Optional[StopwordsConfig] = field(default=None)
+    analyzer_config: Optional[TextAnalyzerConfig] = Field(default=None, alias="analyzerConfig")
+    stopword_config: Optional[StopwordsConfig] = Field(default=None, alias="stopwordConfig")
+
+    @field_validator("analyzer_config", mode="before")
+    @classmethod
+    def _parse_analyzer_config(cls, v: Optional[Dict[str, Any]]) -> Optional[TextAnalyzerConfig]:
+        if v is None:
+            return None
+        if "asciiFold" not in v and "stopwordPreset" not in v:
+            return None
+        return TextAnalyzerConfig(
+            ascii_fold=v.get("asciiFold", False),
+            ascii_fold_ignore=v.get("asciiFoldIgnore"),
+            stopword_preset=v.get("stopwordPreset"),
+        )
+
+    @field_validator("stopword_config", mode="before")
+    @classmethod
+    def _parse_stopword_config(cls, v: Optional[Dict[str, Any]]) -> Optional[StopwordsConfig]:
+        if v is None:
+            return None
+        return StopwordsConfig(
+            preset=StopwordsPreset(v["preset"]),
+            additions=v.get("additions"),
+            removals=v.get("removals"),
+        )

From 61665e712dac3d6e0665b9e5e8e7ae85d8e47144 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Wed, 15 Apr 2026 11:08:31 +0100
Subject: [PATCH 6/8] refactor: move tokenize property to class config

---
 integration/test_tokenize.py            | 27 +++++++----------
 weaviate/collections/config/executor.py | 40 +++++++++++++++++++++++++
 weaviate/tokenization/executor.py       | 36 ----------------------
 3 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index ddd67b656..565cb197d 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -213,11 +213,8 @@ def test_stopword_config_deserialized_on_property(
                     ],
                 }
             )
-            result = client.tokenization.for_property(
-                collection_name="TestDeserStopword",
-                property_name="title",
-                text="the quick",
-            )
+            col = client.collections.get("TestDeserStopword")
+            result = col.config.tokenize_property(property_name="title", text="the quick")
             assert isinstance(result, TokenizeResult)
             assert result.tokenization == Tokenization.WORD
             if result.stopword_config is not None:
@@ -241,11 +238,8 @@ def test_property_result_types(self, client: weaviate.WeaviateClient) -> None:
                     ],
                 }
             )
-            result = client.tokenization.for_property(
-                collection_name="TestDeserPropTypes",
-                property_name="tag",
-                text="  Hello World  ",
-            )
+            col = client.collections.get("TestDeserPropTypes")
+            result = col.config.tokenize_property(property_name="tag", text="  Hello World  ")
             assert isinstance(result, TokenizeResult)
             assert result.tokenization == Tokenization.FIELD
             assert result.indexed == ["Hello World"]
@@ -304,13 +298,12 @@ def test_text_raises_on_old_server(self, client: weaviate.WeaviateClient) -> Non
         with pytest.raises(WeaviateUnsupportedFeatureError):
             client.tokenization.text(text="hello", tokenization=Tokenization.WORD)
 
-    def test_for_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None:
+    def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None:
         if client._connection._weaviate_version.is_at_least(1, 37, 0):
             pytest.skip("Version gate only applies to Weaviate < 1.37.0")
+        col = client.collections.get("Any")
         with pytest.raises(WeaviateUnsupportedFeatureError):
-            client.tokenization.for_property(
-                collection_name="Any", property_name="title", text="hello"
-            )
+            col.config.tokenize_property(property_name="title", text="hello")
 
 
 # ---------------------------------------------------------------------------
@@ -319,7 +312,7 @@ def test_for_property_raises_on_old_server(self, client: weaviate.WeaviateClient
 
 
 class TestAsyncClient:
-    """Verify both text() and for_property() work through the async client."""
+    """Verify text() and tokenize_property() work through the async client."""
 
     @pytest.mark.asyncio
     async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None:
@@ -362,8 +355,8 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien
                     ],
                 }
             )
-            result = await async_client.tokenization.for_property(
-                collection_name="TestAsyncPropTokenize",
+            col = async_client.collections.get("TestAsyncPropTokenize")
+            result = await col.config.tokenize_property(
                 property_name="title",
                 text="The quick brown fox",
             )
diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py
index bd302c3c4..9e9390cda 100644
--- a/weaviate/collections/config/executor.py
+++ b/weaviate/collections/config/executor.py
@@ -56,6 +56,7 @@
     WeaviateInvalidInputError,
     WeaviateUnsupportedFeatureError,
 )
+from weaviate.tokenization.models import TokenizeResult
 from weaviate.util import (
     _capitalize_first_letter,
     _decode_json_response_dict,
@@ -666,3 +667,42 @@ def resp(res: Response) -> bool:
             error_msg="Property may not exist",
             status_codes=_ExpectedStatusCodes(ok_in=[200], error="property exists"),
         )
+
+    def tokenize_property(
+        self,
+        property_name: str,
+        text: str,
+    ) -> executor.Result[TokenizeResult]:
+        """Tokenize text using a property's configured tokenization settings.
+
+        Args:
+            property_name: The property name whose tokenization config to use.
+            text: The text to tokenize.
+
+        Returns:
+            A TokenizeResult with indexed and query token lists.
+
+        Raises:
+            WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
+        """
+        if self._connection._weaviate_version.is_lower_than(1, 37, 0):
+            raise WeaviateUnsupportedFeatureError(
+                "Tokenization",
+                str(self._connection._weaviate_version),
+                "1.37.0",
+            )
+
+        path = f"/schema/{self._name}/properties/{property_name}/tokenize"
+        payload: Dict[str, Any] = {"text": text}
+
+        def resp(response: Response) -> TokenizeResult:
+            return TokenizeResult.model_validate(response.json())
+
+        return executor.execute(
+            response_callback=resp,
+            method=self._connection.post,
+            path=path,
+            weaviate_object=payload,
+            error_msg="Property tokenization failed",
+            status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"),
+        )
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index 226aeb6c6..5093c14e9 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -79,39 +79,3 @@ def resp(response: Response) -> TokenizeResult:
             error_msg="Tokenization failed",
             status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"),
         )
-
-    def for_property(
-        self,
-        collection_name: str,
-        property_name: str,
-        text: str,
-    ) -> executor.Result[TokenizeResult]:
-        """Tokenize text using a property's configured tokenization settings.
-
-        Args:
-            collection_name: The collection (class) name.
-            property_name: The property name whose tokenization config to use.
-            text: The text to tokenize.
-
-        Returns:
-            A TokenizeResult with indexed and query token lists.
-
-        Raises:
-            WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
-        """
-        self._check_version()
-        path = f"/schema/{collection_name}/properties/{property_name}/tokenize"
-
-        payload: Dict[str, Any] = {"text": text}
-
-        def resp(response: Response) -> TokenizeResult:
-            return TokenizeResult.model_validate(response.json())
-
-        return executor.execute(
-            response_callback=resp,
-            method=self._connection.post,
-            path=path,
-            weaviate_object=payload,
-            error_msg="Property tokenization failed",
-            status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"),
-        )

From aea03278f3ee5712608589138637d77131364955 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Wed, 15 Apr 2026 11:16:24 +0100
Subject: [PATCH 7/8] fix: remove trailing whitespace in __init__.py

---
 weaviate/outputs/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/weaviate/outputs/__init__.py b/weaviate/outputs/__init__.py
index 5381d6dc4..75cb031e0 100644
--- a/weaviate/outputs/__init__.py
+++ b/weaviate/outputs/__init__.py
@@ -9,7 +9,7 @@
     query,
     replication,
     tenants,
-    tokenization,  
+    tokenization,
     users,
 )
 

From ef55ce283b1aae518cb8eacc6dba7fdf6530b709 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Wed, 15 Apr 2026 11:55:39 +0100
Subject: [PATCH 8/8] test: add version gate for Weaviate >= 1.37.0 in
 tokenization tests

---
 integration/test_tokenize.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index 565cb197d..97587235b 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -36,6 +36,12 @@ def client() -> Generator[weaviate.WeaviateClient, None, None]:
     c.close()
 
 
+@pytest.fixture(autouse=False)
+def require_1_37(client: weaviate.WeaviateClient) -> None:
+    if client._connection._weaviate_version.is_lower_than(1, 37, 0):
+        pytest.skip("Tokenization requires Weaviate >= 1.37.0")
+
+
 @pytest_asyncio.fixture
 async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]:
     c = weaviate.use_async_with_local(
@@ -51,6 +57,7 @@ async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]:
 # ---------------------------------------------------------------------------
 
 
+@pytest.mark.usefixtures("require_1_37")
 class TestSerialization:
     """Verify the client correctly serializes different input forms."""
 
@@ -167,6 +174,7 @@ def test_stopword_presets_with_base_and_removals(self, client: weaviate.Weaviate
 # ---------------------------------------------------------------------------
 
 
+@pytest.mark.usefixtures("require_1_37")
 class TestDeserialization:
     """Verify the client correctly deserializes response fields into typed objects."""
 
@@ -311,6 +319,7 @@ def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateC
 # ---------------------------------------------------------------------------
 
 
+@pytest.mark.usefixtures("require_1_37")
 class TestAsyncClient:
     """Verify text() and tokenize_property() work through the async client."""