diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py new file mode 100644 index 000000000..97587235b --- /dev/null +++ b/integration/test_tokenize.py @@ -0,0 +1,378 @@ +"""Integration tests for the tokenization module. + +These tests cover the client's responsibilities: +- Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, _StopwordsCreate) +- Correct deserialization of responses into typed objects +- Client-side validation (_TextAnalyzerConfigCreate rejects invalid input) +- Version gate (>= 1.37.0) +- Both sync and async client paths +""" + +from typing import AsyncGenerator, Generator + +import pytest +import pytest_asyncio + +import weaviate +from weaviate.collections.classes.config import ( + StopwordsConfig, + StopwordsPreset, + TextAnalyzerConfig, + Tokenization, + _StopwordsCreate, + _TextAnalyzerConfigCreate, +) +from weaviate.config import AdditionalConfig +from weaviate.exceptions import WeaviateUnsupportedFeatureError +from weaviate.tokenization.models import TokenizeResult + + +@pytest.fixture(scope="module") +def client() -> Generator[weaviate.WeaviateClient, None, None]: + c = weaviate.connect_to_local( + additional_config=AdditionalConfig(timeout=(60, 120)), + ) + yield c + c.close() + + +@pytest.fixture(autouse=False) +def require_1_37(client: weaviate.WeaviateClient) -> None: + if client._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("Tokenization requires Weaviate >= 1.37.0") + + +@pytest_asyncio.fixture +async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]: + c = weaviate.use_async_with_local( + additional_config=AdditionalConfig(timeout=(60, 120)), + ) + await c.connect() + yield c + await c.close() + + +# --------------------------------------------------------------------------- +# Serialization +# --------------------------------------------------------------------------- + + +@pytest.mark.usefixtures("require_1_37") +class TestSerialization: + """Verify the client correctly serializes different input forms.""" + + @pytest.mark.parametrize( + "tokenization,text,expected_tokens", + [ + (Tokenization.WORD, "The quick brown fox", ["the", "quick", "brown", "fox"]), + (Tokenization.LOWERCASE, "Hello World Test", ["hello", "world", "test"]), + (Tokenization.WHITESPACE, "Hello World Test", ["Hello", "World", "Test"]), + (Tokenization.FIELD, " Hello World ", ["Hello World"]), + (Tokenization.TRIGRAM, "Hello", ["hel", "ell", "llo"]), + ], + ) + def test_tokenization_enum( + self, + client: weaviate.WeaviateClient, + tokenization: Tokenization, + text: str, + expected_tokens: list, + ) -> None: + result = client.tokenization.text(text=text, tokenization=tokenization) + assert isinstance(result, TokenizeResult) + assert result.tokenization == tokenization + assert result.indexed == expected_tokens + assert result.query == expected_tokens + + def test_no_analyzer_config(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenization.text(text="hello world", tokenization=Tokenization.WORD) + assert result.tokenization == Tokenization.WORD + assert result.indexed == ["hello", "world"] + assert result.analyzer_config is None + + def test_ascii_fold(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True) + result = client.tokenization.text( + text="L'école est fermée", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert result.indexed == ["l", "ecole", "est", "fermee"] + + def test_ascii_fold_with_ignore(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é"]) + result = client.tokenization.text( + text="L'école est fermée", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert result.indexed == ["l", "école", "est", "fermée"] + + def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.EN) + result = client.tokenization.text( + text="The quick brown fox", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert "the" not in result.query + assert "quick" in result.query + + def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset="en") + result = client.tokenization.text( + text="The quick brown fox", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert "the" not in result.query + + def test_ascii_fold_combined_with_stopwords(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate( + ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN + ) + result = client.tokenization.text( + text="The école est fermée", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert result.indexed == ["the", "école", "est", "fermée"] + assert "the" not in result.query + assert "école" in result.query + + def test_stopword_presets_custom_additions(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset="custom") + result = client.tokenization.text( + text="hello world test", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + stopword_presets={ + "custom": _StopwordsCreate(preset=None, additions=["test"], removals=None), + }, + ) + assert result.indexed == ["hello", "world", "test"] + assert result.query == ["hello", "world"] + + def test_stopword_presets_with_base_and_removals(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset="en-no-the") + result = client.tokenization.text( + text="the quick", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + stopword_presets={ + "en-no-the": _StopwordsCreate( + preset=StopwordsPreset.EN, additions=None, removals=["the"] + ), + }, + ) + assert result.indexed == ["the", "quick"] + assert result.query == ["the", "quick"] + + +# --------------------------------------------------------------------------- +# Deserialization +# --------------------------------------------------------------------------- + + +@pytest.mark.usefixtures("require_1_37") +class TestDeserialization: + """Verify the client correctly deserializes response fields into typed objects.""" + + def test_result_type(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD) + assert isinstance(result, TokenizeResult) + assert isinstance(result.indexed, list) + assert isinstance(result.query, list) + + def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate( + ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN + ) + result = client.tokenization.text( + text="L'école", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert isinstance(result.analyzer_config, TextAnalyzerConfig) + assert result.analyzer_config.ascii_fold is True + assert result.analyzer_config.ascii_fold_ignore == ["é"] + assert result.analyzer_config.stopword_preset == "en" + + def test_no_analyzer_config_returns_none(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD) + assert result.analyzer_config is None + + def test_stopword_config_deserialized_on_property( + self, client: weaviate.WeaviateClient + ) -> None: + client.collections.delete("TestDeserStopword") + try: + client.collections.create_from_dict( + { + "class": "TestDeserStopword", + "vectorizer": "none", + "properties": [ + { + "name": "title", + "dataType": ["text"], + "tokenization": "word", + "textAnalyzer": {"stopwordPreset": "en"}, + }, + ], + } + ) + col = client.collections.get("TestDeserStopword") + result = col.config.tokenize_property(property_name="title", text="the quick") + assert isinstance(result, TokenizeResult) + assert result.tokenization == Tokenization.WORD + if result.stopword_config is not None: + assert isinstance(result.stopword_config, StopwordsConfig) + finally: + client.collections.delete("TestDeserStopword") + + def test_property_result_types(self, client: weaviate.WeaviateClient) -> None: + client.collections.delete("TestDeserPropTypes") + try: + client.collections.create_from_dict( + { + "class": "TestDeserPropTypes", + "vectorizer": "none", + "properties": [ + { + "name": "tag", + "dataType": ["text"], + "tokenization": "field", + }, + ], + } + ) + col = client.collections.get("TestDeserPropTypes") + result = col.config.tokenize_property(property_name="tag", text=" Hello World ") + assert isinstance(result, TokenizeResult) + assert result.tokenization == Tokenization.FIELD + assert result.indexed == ["Hello World"] + finally: + client.collections.delete("TestDeserPropTypes") + + +# --------------------------------------------------------------------------- +# Client-side validation (_TextAnalyzerConfigCreate) +# --------------------------------------------------------------------------- + + +class TestClientSideValidation: + """Verify that _TextAnalyzerConfigCreate rejects invalid input before hitting the server.""" + + def test_ascii_fold_ignore_without_fold_raises(self) -> None: + with pytest.raises(ValueError, match="asciiFoldIgnore"): + _TextAnalyzerConfigCreate(ascii_fold=False, ascii_fold_ignore=["é"]) + + def test_ascii_fold_ignore_without_fold_default_raises(self) -> None: + with pytest.raises(ValueError, match="asciiFoldIgnore"): + _TextAnalyzerConfigCreate(ascii_fold_ignore=["é"]) + + def test_valid_config_does_not_raise(self) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]) + assert cfg.asciiFold is True + assert cfg.asciiFoldIgnore == ["é", "ñ"] + + def test_fold_without_ignore_is_valid(self) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True) + assert cfg.asciiFold is True + assert cfg.asciiFoldIgnore is None + + def test_stopword_preset_only_is_valid(self) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset="en") + assert cfg.stopwordPreset == "en" + + def test_empty_config_is_valid(self) -> None: + cfg = _TextAnalyzerConfigCreate() + assert cfg.asciiFold is None + assert cfg.asciiFoldIgnore is None + assert cfg.stopwordPreset is None + + +# --------------------------------------------------------------------------- +# Version gate +# --------------------------------------------------------------------------- + + +class TestVersionGate: + """On Weaviate < 1.37 the client must raise before sending the request.""" + + def test_text_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None: + if client._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + with pytest.raises(WeaviateUnsupportedFeatureError): + client.tokenization.text(text="hello", tokenization=Tokenization.WORD) + + def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None: + if client._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + col = client.collections.get("Any") + with pytest.raises(WeaviateUnsupportedFeatureError): + col.config.tokenize_property(property_name="title", text="hello") + + +# --------------------------------------------------------------------------- +# Async client +# --------------------------------------------------------------------------- + + +@pytest.mark.usefixtures("require_1_37") +class TestAsyncClient: + """Verify text() and tokenize_property() work through the async client.""" + + @pytest.mark.asyncio + async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: + result = await async_client.tokenization.text( + text="The quick brown fox", + tokenization=Tokenization.WORD, + ) + assert isinstance(result, TokenizeResult) + assert result.indexed == ["the", "quick", "brown", "fox"] + + @pytest.mark.asyncio + async def test_text_with_analyzer_config( + self, async_client: weaviate.WeaviateAsyncClient + ) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True, stopword_preset=StopwordsPreset.EN) + result = await async_client.tokenization.text( + text="L'école est fermée", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert result.indexed == ["l", "ecole", "est", "fermee"] + assert isinstance(result.analyzer_config, TextAnalyzerConfig) + assert result.analyzer_config.ascii_fold is True + + @pytest.mark.asyncio + async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: + await async_client.collections.delete("TestAsyncPropTokenize") + try: + await async_client.collections.create_from_dict( + { + "class": "TestAsyncPropTokenize", + "vectorizer": "none", + "properties": [ + { + "name": "title", + "dataType": ["text"], + "tokenization": "word", + "textAnalyzer": {"stopwordPreset": "en"}, + }, + ], + } + ) + col = async_client.collections.get("TestAsyncPropTokenize") + result = await col.config.tokenize_property( + property_name="title", + text="The quick brown fox", + ) + assert isinstance(result, TokenizeResult) + assert result.tokenization == Tokenization.WORD + assert result.indexed == ["the", "quick", "brown", "fox"] + assert "the" not in result.query + assert "quick" in result.query + finally: + await async_client.collections.delete("TestAsyncPropTokenize") diff --git a/weaviate/__init__.py b/weaviate/__init__.py index 562b142bc..2e7e5e58b 100644 --- a/weaviate/__init__.py +++ b/weaviate/__init__.py @@ -21,6 +21,7 @@ embedded, exceptions, outputs, + tokenization, types, ) from .client import Client, WeaviateAsyncClient, WeaviateClient @@ -67,6 +68,7 @@ "embedded", "exceptions", "outputs", + "tokenization", "types", "use_async_with_custom", "use_async_with_embedded", diff --git a/weaviate/client.py b/weaviate/client.py index f22389403..fe5ad17fe 100644 --- a/weaviate/client.py +++ b/weaviate/client.py @@ -23,6 +23,7 @@ from .export import _Export, _ExportAsync from .groups import _Groups, _GroupsAsync from .rbac import _Roles, _RolesAsync +from .tokenization import _Tokenization, _TokenizationAsync from .types import NUMBER from .users import _Users, _UsersAsync @@ -84,6 +85,7 @@ def __init__( self.debug = _DebugAsync(self._connection) self.groups = _GroupsAsync(self._connection) self.roles = _RolesAsync(self._connection) + self.tokenization = _TokenizationAsync(self._connection) self.users = _UsersAsync(self._connection) async def __aenter__(self) -> "WeaviateAsyncClient": @@ -160,6 +162,7 @@ def __init__( self.debug = _Debug(self._connection) self.groups = _Groups(self._connection) self.roles = _Roles(self._connection) + self.tokenization = _Tokenization(self._connection) self.users = _Users(self._connection) def __enter__(self) -> "WeaviateClient": diff --git a/weaviate/client.pyi b/weaviate/client.pyi index 0ac79415c..d7b99eba6 100644 --- a/weaviate/client.pyi +++ b/weaviate/client.pyi @@ -22,6 +22,7 @@ from .collections.batch.client import _BatchClientWrapper, _BatchClientWrapperAs from .debug import _Debug, _DebugAsync from .export import _Export, _ExportAsync from .rbac import _Roles, _RolesAsync +from .tokenization import _Tokenization, _TokenizationAsync from .types import NUMBER TIMEOUT_TYPE = Union[Tuple[NUMBER, NUMBER], NUMBER] @@ -37,6 +38,7 @@ class WeaviateAsyncClient(_WeaviateClientExecutor[ConnectionAsync]): debug: _DebugAsync groups: _GroupsAsync roles: _RolesAsync + tokenization: _TokenizationAsync users: _UsersAsync async def close(self) -> None: ... @@ -61,6 +63,7 @@ class WeaviateClient(_WeaviateClientExecutor[ConnectionSync]): debug: _Debug groups: _Groups roles: _Roles + tokenization: _Tokenization users: _Users def close(self) -> None: ... diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index 103ab70ac..fe9f5ec0d 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -56,6 +56,7 @@ WeaviateInvalidInputError, WeaviateUnsupportedFeatureError, ) +from weaviate.tokenization.models import TokenizeResult from weaviate.util import ( _capitalize_first_letter, _decode_json_response_dict, @@ -666,3 +667,42 @@ def resp(res: Response) -> bool: error_msg="Property may not exist", status_codes=_ExpectedStatusCodes(ok_in=[200], error="property exists"), ) + + def tokenize_property( + self, + property_name: str, + text: str, + ) -> executor.Result[TokenizeResult]: + """Tokenize text using a property's configured tokenization settings. + + Args: + property_name: The property name whose tokenization config to use. + text: The text to tokenize. + + Returns: + A TokenizeResult with indexed and query token lists. + + Raises: + WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. + """ + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Tokenization", + str(self._connection._weaviate_version), + "1.37.0", + ) + + path = f"/schema/{self._name}/properties/{property_name}/tokenize" + payload: Dict[str, Any] = {"text": text} + + def resp(response: Response) -> TokenizeResult: + return TokenizeResult.model_validate(response.json()) + + return executor.execute( + response_callback=resp, + method=self._connection.post, + path=path, + weaviate_object=payload, + error_msg="Property tokenization failed", + status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"), + ) diff --git a/weaviate/outputs/__init__.py b/weaviate/outputs/__init__.py index cd6176d93..75cb031e0 100644 --- a/weaviate/outputs/__init__.py +++ b/weaviate/outputs/__init__.py @@ -9,6 +9,7 @@ query, replication, tenants, + tokenization, users, ) @@ -23,5 +24,6 @@ "query", "replication", "tenants", + "tokenization", "users", ] diff --git a/weaviate/outputs/tokenization.py b/weaviate/outputs/tokenization.py new file mode 100644 index 000000000..0854f8b0d --- /dev/null +++ b/weaviate/outputs/tokenization.py @@ -0,0 +1,5 @@ +from weaviate.tokenization.models import TokenizeResult + +__all__ = [ + "TokenizeResult", +] diff --git a/weaviate/tokenization/__init__.py b/weaviate/tokenization/__init__.py new file mode 100644 index 000000000..2437f7745 --- /dev/null +++ b/weaviate/tokenization/__init__.py @@ -0,0 +1,7 @@ +"""Module for tokenization operations.""" + +from .async_ import _TokenizationAsync +from .sync import _Tokenization +from .models import TokenizeResult + +__all__ = ["_Tokenization", "_TokenizationAsync", "TokenizeResult"] diff --git a/weaviate/tokenization/async_.py b/weaviate/tokenization/async_.py new file mode 100644 index 000000000..5406a39dd --- /dev/null +++ b/weaviate/tokenization/async_.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionAsync +from weaviate.tokenization.executor import _TokenizationExecutor + + +@executor.wrap("async") +class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]): + pass diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py new file mode 100644 index 000000000..5093c14e9 --- /dev/null +++ b/weaviate/tokenization/executor.py @@ -0,0 +1,81 @@ +"""Tokenize executor.""" + +from typing import Any, Dict, Generic, Optional + +from httpx import Response + +from weaviate.collections.classes.config import ( + Tokenization, + _StopwordsCreate, + _TextAnalyzerConfigCreate, +) +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes +from weaviate.exceptions import WeaviateUnsupportedFeatureError +from weaviate.tokenization.models import TokenizeResult + + +class _TokenizationExecutor(Generic[ConnectionType]): + def __init__(self, connection: ConnectionType): + self._connection = connection + + def _check_version(self) -> None: + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Tokenization", + str(self._connection._weaviate_version), + "1.37.0", + ) + + def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[_TextAnalyzerConfigCreate] = None, + stopword_presets: Optional[Dict[str, _StopwordsCreate]] = None, + ) -> executor.Result[TokenizeResult]: + """Tokenize text using the generic /v1/tokenize endpoint. + + Args: + text: The text to tokenize. + tokenization: The tokenization method to use (e.g. Tokenization.WORD). + analyzer_config: Text analyzer settings (ASCII folding, stopword preset). + stopword_presets: Custom stopword preset definitions, keyed by name. + Each value is a ``_StopwordsCreate`` with optional preset, additions, + and removals fields. + + Returns: + A TokenizeResult with indexed and query token lists. + + Raises: + WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. + """ + self._check_version() + + payload: Dict[str, Any] = { + "text": text, + "tokenization": tokenization.value, + } + + if analyzer_config is not None: + ac_dict = analyzer_config._to_dict() + if ac_dict: + payload["analyzerConfig"] = ac_dict + + if stopword_presets is not None: + payload["stopwordPresets"] = { + name: cfg._to_dict() for name, cfg in stopword_presets.items() + } + + def resp(response: Response) -> TokenizeResult: + return TokenizeResult.model_validate(response.json()) + + return executor.execute( + response_callback=resp, + method=self._connection.post, + path="/tokenize", + weaviate_object=payload, + error_msg="Tokenization failed", + status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"), + ) diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py new file mode 100644 index 000000000..8bfa508f8 --- /dev/null +++ b/weaviate/tokenization/models.py @@ -0,0 +1,56 @@ +"""Return types for tokenization operations.""" + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, ConfigDict, Field, field_validator + +from weaviate.collections.classes.config import ( + StopwordsConfig, + StopwordsPreset, + TextAnalyzerConfig, + Tokenization, +) + + +class TokenizeResult(BaseModel): + """Result of a tokenization operation. + + Attributes: + tokenization: The tokenization method that was applied. + indexed: Tokens as they would be stored in the inverted index. + query: Tokens as they would be used for querying (after stopword removal). + analyzer_config: The text analyzer configuration that was used, if any. + stopword_config: The stopword configuration that was used, if any. + """ + + model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) + + tokenization: Tokenization + indexed: List[str] + query: List[str] + analyzer_config: Optional[TextAnalyzerConfig] = Field(default=None, alias="analyzerConfig") + stopword_config: Optional[StopwordsConfig] = Field(default=None, alias="stopwordConfig") + + @field_validator("analyzer_config", mode="before") + @classmethod + def _parse_analyzer_config(cls, v: Optional[Dict[str, Any]]) -> Optional[TextAnalyzerConfig]: + if v is None: + return None + if "asciiFold" not in v and "stopwordPreset" not in v: + return None + return TextAnalyzerConfig( + ascii_fold=v.get("asciiFold", False), + ascii_fold_ignore=v.get("asciiFoldIgnore"), + stopword_preset=v.get("stopwordPreset"), + ) + + @field_validator("stopword_config", mode="before") + @classmethod + def _parse_stopword_config(cls, v: Optional[Dict[str, Any]]) -> Optional[StopwordsConfig]: + if v is None: + return None + return StopwordsConfig( + preset=StopwordsPreset(v["preset"]), + additions=v.get("additions"), + removals=v.get("removals"), + ) diff --git a/weaviate/tokenization/sync.py b/weaviate/tokenization/sync.py new file mode 100644 index 000000000..ab28cc98e --- /dev/null +++ b/weaviate/tokenization/sync.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionSync +from weaviate.tokenization.executor import _TokenizationExecutor + + +@executor.wrap("sync") +class _Tokenization(_TokenizationExecutor[ConnectionSync]): + pass