Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 111 additions & 1 deletion .github/workflows/releases.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,119 @@ jobs:
ls
ls dist

# ---------------------------------------------------------------------------
# Pre-publish gate: confirm zarr-metadata's required floor is on PyPI.
#
# zarr-python and zarr-metadata co-develop in this monorepo. During local
# development zarr-metadata is resolved from packages/zarr-metadata/ via the
# uv workspace (see [tool.uv.sources] in pyproject.toml). The wheel we are
# about to publish, however, only carries a version-range requirement
# (e.g. `zarr-metadata>=0.1.1,<0.2`); end users will resolve that against
# PyPI.
#
# The failure mode this job catches: a zarr-python PR added code that
# depends on a zarr-metadata feature that has been merged into
# packages/zarr-metadata/ but not yet released to PyPI. CI passed because
# the workspace override resolved to the in-tree copy, but a user installing
# the resulting zarr-python wheel would get a published zarr-metadata that
# lacks the feature, and zarr-python would fail at import or first use.
#
# The mitigation here is a presence check on PyPI: extract the floor of
# zarr-python's zarr-metadata requirement from the wheel's METADATA file,
# and refuse to upload if that exact version is not yet on PyPI. This is
# analogous to what `cargo publish` does automatically against crates.io,
# but expressed as a CI step because twine has no built-in equivalent.
#
# When you bump zarr-metadata to a new version that zarr-python depends on,
# the required release order is:
# 1. release zarr-metadata to PyPI;
# 2. bump the floor in zarr-python's [project.dependencies];
# 3. release zarr-python.
# This job will fail at step 3 if step 1 was skipped.
# ---------------------------------------------------------------------------
verify_pypi_dependency:
name: Verify zarr-metadata floor is on PyPI
needs: [build_artifacts]
runs-on: ubuntu-latest
# Run only on actual releases. Pull-request and push-to-main runs go
# through CI without this gate, since their wheels are never uploaded.
if: github.event_name == 'release'
steps:
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: releases
path: dist

- name: Check zarr-metadata floor is published on PyPI
run: |
# The wheel's METADATA file lives at zarr-*.dist-info/METADATA inside
# the wheel. `unzip -p` writes a file's contents to stdout without
# extracting; the glob matches whichever dist-info dir is inside.
metadata="$(unzip -p dist/zarr-*.whl '*.dist-info/METADATA')"

# Pick the Requires-Dist line for zarr-metadata. The wheel may have
# several Requires-Dist lines for different extras; we want the one
# that applies unconditionally (no `; extra == "..."` marker).
# Match `Requires-Dist: zarr-metadata` followed by anything that
# ends a project name in PEP 508: a version operator (<, >, =, !,
# ~), whitespace, `[` (extras), `;` (markers), `(` (legacy
# parenthesized version), or end-of-line. The character class
# excludes letters/digits/underscore/hyphen, so a hypothetical
# `zarr-metadata-ext` dep would not match.
req_line="$(printf '%s' "$metadata" \
| grep -E '^Requires-Dist: zarr-metadata([^A-Za-z0-9_-]|$)' \
| grep -v 'extra ==' \
|| true)"

if [ -z "$req_line" ]; then
echo "::error::Could not find an unconditional Requires-Dist line for zarr-metadata in the built wheel."
echo "Wheel METADATA Requires-Dist lines:"
printf '%s' "$metadata" | grep '^Requires-Dist:' || true
exit 1
fi
echo "Requires-Dist line: $req_line"

# Extract the floor: the version after `>=`. Version specifiers in
# PEP 440 are comma-separated (e.g. `>=0.1.1, <0.2`); the floor is
# the bound after the first `>=`. `grep -oE '>=[^,]+'` captures
# `>=0.1.1` (everything up to the comma), then we strip the
# operator and surrounding whitespace.
floor="$(printf '%s' "$req_line" \
| grep -oE '>=[[:space:]]*[^,]+' \
| sed 's/^>=[[:space:]]*//; s/[[:space:]]*$//' \
| head -1)"

if [ -z "$floor" ]; then
echo "::error::Could not extract a `>=` floor from: $req_line"
echo "zarr-python's zarr-metadata requirement must include a `>=` bound so this gate has something to check."
exit 1
fi
echo "zarr-metadata floor: $floor"

# PyPI's JSON API returns 200 if the named version exists and 404
# if it doesn't. -s silences progress output; -o /dev/null discards
# the body; -w %%{http_code} prints just the status. Any non-200
# response means the floor has not been published yet.
status="$(curl -s -o /dev/null -w '%{http_code}' \
"https://pypi.org/pypi/zarr-metadata/${floor}/json")"

if [ "$status" != "200" ]; then
echo "::error::zarr-metadata ${floor} is not available on PyPI (HTTP ${status})."
echo ""
echo "The wheel about to be uploaded declares it requires zarr-metadata ${floor} or later,"
echo "but no such release exists on PyPI. Publish zarr-metadata ${floor} first, then"
echo "re-run this release workflow."
exit 1
fi
echo "OK: zarr-metadata ${floor} is on PyPI; safe to upload zarr-python."

upload_pypi:
name: Upload to PyPI
needs: [build_artifacts, test_dist_pypi]
# Depend on the new gate so the upload step does not run if the floor
# is missing from PyPI. The gate runs only on releases (see its `if:`
# condition); on PR / push runs it is skipped, and skipped jobs in a
# `needs:` list are treated as satisfied by GitHub Actions.
needs: [build_artifacts, test_dist_pypi, verify_pypi_dependency]
runs-on: ubuntu-latest
if: github.event_name == 'release'
environment:
Expand Down
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ repos:
- typing_extensions
- universal-pathlib
- obstore>=0.5.1
- zarr-metadata>=0.1.1
# Tests
- pytest
- hypothesis
Expand All @@ -63,6 +64,14 @@ repos:
entry: "\\.(lstrip|rstrip)\\([\"'][^\"']{2,}[\"']\\)"
types: [python]
files: ^(src|tests)/
- id: check-min-deps-floor
name: check min_deps zarr-metadata pin matches the project floor
language: system
entry: python ci/check_min_deps_floor.py
# Run whenever pyproject.toml changes; pass_filenames is False because
# the script reads the file directly rather than processing argv.
pass_filenames: false
files: ^pyproject\.toml$
- repo: https://github.com/zizmorcore/zizmor-pre-commit
rev: v1.24.1
hooks:
Expand Down
3 changes: 3 additions & 0 deletions changes/3961.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
``zarr-python`` now depends on the [``zarr-metadata``](https://pypi.org/project/zarr-metadata/) package, which provides spec-defined TypedDicts and literal types for Zarr v2 and v3 metadata documents. Several internal types previously defined in ``zarr-python`` are now aliases that re-export their canonical definitions from ``zarr-metadata``: ``zarr.codecs.blosc.BloscShuffleLiteral``, ``zarr.codecs.blosc.BloscCnameLiteral``, ``zarr.codecs.blosc.BloscConfigV3``, ``zarr.codecs.blosc.BloscJSON_V3``, ``zarr.codecs.cast_value.RoundingMode``, ``zarr.codecs.cast_value.OutOfRangeMode``, ``zarr.core.metadata.v2.ArrayV2MetadataDict``, ``zarr.core.metadata.v3.AllowedExtraField``, and ``zarr.core.metadata.v3.ArrayMetadataJSON_V3``.

The version requirement (``zarr-metadata>=0.1.1,<0.2``) caps the major version so a future breaking change in ``zarr-metadata`` cannot silently break installed ``zarr-python``. During local development, ``zarr-metadata`` is resolved from the in-tree copy under ``packages/zarr-metadata/`` via a uv workspace; see [the contributing guide](https://zarr.readthedocs.io/en/stable/contributing.html) for details.
111 changes: 111 additions & 0 deletions ci/check_min_deps_floor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""
Enforce the invariant: `min_deps` pins zarr-metadata to the floor of
zarr-python's declared zarr-metadata range.

zarr-python declares `zarr-metadata>=X.Y.Z,<...>` in `[project.dependencies]`.
The `min_deps` hatch env tests against the *minimum* supported deps, so it
must pin zarr-metadata to exactly that floor (e.g. `zarr-metadata==X.Y.Z`).
Without this script the two declarations can drift silently — the project's
floor could rise without `min_deps` noticing, and `min_deps` would no longer
verify what its name claims.

Run:
python ci/check_min_deps_floor.py

Exits 0 if floors agree; non-zero with a clear message if not.
"""

from __future__ import annotations

import re
import sys
import tomllib
from pathlib import Path

ROOT = Path(__file__).parent.parent.resolve()
PYPROJECT = ROOT / "pyproject.toml"

# Match `>=X.Y.Z` (with or without surrounding whitespace) inside a PEP 440
# version specifier set. Captures just the version number.
_FLOOR_RE = re.compile(r">=\s*([^,\s]+)")
# Match `==X.Y.Z` likewise. Captures the version number.
_PIN_RE = re.compile(r"==\s*([^,\s]+)")


def find_zarr_metadata_floor(deps: list[str]) -> str:
"""Return the >= floor of zarr-metadata declared in `deps`.

`deps` is a list of PEP 508 strings, e.g. as found in
`[project.dependencies]`. Raises if zarr-metadata is not present, or
if its specifier set has no `>=` bound.
"""
for dep in deps:
# Project name is everything up to the first non-name character.
# Quick split: package name terminates at the first occurrence of a
# version operator, whitespace, `[`, `;`, or `(`.
name = re.split(r"[<>=!~\s\[;(]", dep, maxsplit=1)[0].strip()
if name == "zarr-metadata":
match = _FLOOR_RE.search(dep)
if not match:
raise SystemExit(
f"zarr-metadata dependency has no `>=` floor: {dep!r}\n"
"Floor verification requires an explicit lower bound."
)
return match.group(1)
raise SystemExit(
"zarr-metadata not found in [project.dependencies]. "
"This script assumes zarr-python depends on zarr-metadata."
)


def find_zarr_metadata_pin(deps: list[str]) -> str:
"""Return the `==` pin of zarr-metadata declared in `deps`.

`deps` is a list of PEP 508 strings, e.g. as found in
`[tool.hatch.envs.min_deps.extra-dependencies]`. Raises if
zarr-metadata is not present, or if its specifier is not a `==` pin.
"""
for dep in deps:
name = re.split(r"[<>=!~\s\[;(]", dep, maxsplit=1)[0].strip()
if name == "zarr-metadata":
match = _PIN_RE.search(dep)
if not match:
raise SystemExit(
f"min_deps zarr-metadata entry is not an `==` pin: {dep!r}\n"
"The min_deps env must pin zarr-metadata exactly to the floor."
)
return match.group(1)
raise SystemExit(
"zarr-metadata not found in [tool.hatch.envs.min_deps.extra-dependencies].\n"
"Add `'zarr-metadata==<floor>'` to keep min_deps testing the declared floor."
)


def main() -> int:
data = tomllib.loads(PYPROJECT.read_text())

project_deps = data["project"]["dependencies"]
floor = find_zarr_metadata_floor(project_deps)

min_deps_extra = data["tool"]["hatch"]["envs"]["min_deps"]["extra-dependencies"]
pin = find_zarr_metadata_pin(min_deps_extra)

if floor != pin:
print(
f"floor / min_deps pin mismatch for zarr-metadata:\n"
f" [project.dependencies] floor: >={floor}\n"
f" [tool.hatch.envs.min_deps] pin: =={pin}\n"
f"\n"
f"These must agree. Either update the floor in "
f"[project.dependencies] or the pin in min_deps so both name "
f"the same zarr-metadata version.",
file=sys.stderr,
)
return 1

print(f"OK: zarr-metadata floor {floor} matches min_deps pin {pin}.")
return 0


if __name__ == "__main__":
raise SystemExit(main())
49 changes: 49 additions & 0 deletions docs/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,44 @@ To verify that your development environment is working, you can run the unit tes
hatch env run --env test.py3.12-optional run
```

#### The zarr-metadata package and the workspace

zarr-python depends on [`zarr-metadata`](https://pypi.org/project/zarr-metadata/), a small package of TypedDicts and literals describing the JSON shape of Zarr v2 and v3 metadata documents. Both packages live in this repository:

- zarr-python: the project root.
- zarr-metadata: [`packages/zarr-metadata/`](https://github.com/zarr-developers/zarr-python/tree/main/packages/zarr-metadata) — its own `pyproject.toml`, source tree, and tests.

This is configured as a workspace in two places, because the project supports both [`uv`](https://docs.astral.sh/uv/) and [`hatch`](https://hatch.pypa.io/) as front-ends.

**uv workspace declaration** (consumed by `uv sync`, `uv run`, and anything reading uv's project metadata):

```toml
[tool.uv.workspace]
members = ["packages/zarr-metadata"]

[tool.uv.sources]
zarr-metadata = { workspace = true }
```

**Hatch workspace declaration** (consumed by `hatch env run`, including the CI test matrix in `test.yml`):

```toml
[tool.hatch.envs.test]
workspace.members = ["packages/zarr-metadata"]
```

Both mechanisms point at the same in-tree path. They have to be declared separately because uv and hatch don't share configuration. The `dev` env, the `test` matrix, the inherited `gputest` and `upstream` envs all use the in-tree source. The `min_deps` env explicitly opts out (`workspace.members = []`) so it tests against the minimum supported zarr-metadata from PyPI — the floor of the version range in `[project.dependencies]`.

What this means in practice:

- **During local development** (whether you invoke `uv run pytest` or `hatch env run --env test.py3.12-optional run`), zarr-python resolves `zarr-metadata` from the in-tree source under `packages/zarr-metadata/`. Changes you make there are immediately visible to zarr-python without reinstalling.
- **In the published wheel**, only the `[project.dependencies]` version requirement (`zarr-metadata>=0.1.1,<0.2`) is carried. The workspace declarations are development-only configuration. Users installing zarr-python from PyPI get the published zarr-metadata wheel.
- **In CI**, the primary test matrix (`test.yml`) runs `hatch env run` against the in-tree zarr-metadata. A change in `packages/zarr-metadata/` that breaks zarr-python surfaces immediately, before zarr-metadata is released to PyPI. The `min_deps` job additionally exercises the published floor on every PR, so a change in zarr-python that *requires* an unreleased zarr-metadata feature also gets caught.

If you change zarr-metadata, also run zarr-python's test suite. The workspace setup makes this transparent — your usual `uv run pytest` or `hatch env run` picks up the in-tree source automatically.

When releasing a new zarr-metadata version that contains a breaking change, also bump zarr-python's version cap on zarr-metadata (currently `<0.2`) in the same release cycle. See [Releasing zarr-python when zarr-metadata has changed](#releasing-zarr-python-when-zarr-metadata-has-changed) below for the full procedure.

### Creating a branch

Before you do any new work or submit a pull request, please open an issue on GitHub to report the bug or propose the feature you'd like to add.
Expand Down Expand Up @@ -349,6 +387,17 @@ Releases are prepared using the ["Prepare release notes"](https://github.com/zar
4. The release PR is automatically labeled `run-downstream`, which triggers the [downstream test workflow](https://github.com/zarr-developers/zarr-python/actions/workflows/downstream.yml) to run Xarray and numcodecs integration tests against the release branch.
5. Review the rendered changelog in `docs/release-notes.md` and verify downstream tests pass before merging.

### Releasing zarr-python when zarr-metadata has changed

zarr-python depends on the [`zarr-metadata`](https://pypi.org/project/zarr-metadata/) package, which is developed in the same monorepo (see [The zarr-metadata package and the uv workspace](#the-zarr-metadata-package-and-the-uv-workspace) above). When a zarr-python release depends on a zarr-metadata change that has not yet been published to PyPI, the release must follow this order:

1. **Bump zarr-metadata's version** in `packages/zarr-metadata/pyproject.toml` and `packages/zarr-metadata/src/zarr_metadata/__init__.py` (the version literal). Use semver: bump the minor for breaking type changes, the patch for additive changes.
2. **Release zarr-metadata to PyPI.** Tag and publish from `packages/zarr-metadata/`.
3. **Bump zarr-python's floor** on zarr-metadata in `[project.dependencies]` (e.g. `zarr-metadata>=0.1.1,<0.2` → `zarr-metadata>=0.2.0,<0.3`). Update `[tool.uv.workspace]` and `[tool.uv.sources]` only if necessary.
4. **Release zarr-python.**

If steps 1 and 2 are skipped (or step 3's bumped floor names a version that does not yet exist on PyPI), the `verify_pypi_dependency` job in [`releases.yml`](https://github.com/zarr-developers/zarr-python/blob/main/.github/workflows/releases.yml) will fail before the upload step runs. This gate exists because the wheel ships only a version-range requirement; pip resolves that against PyPI on the user's machine, and there is no built-in equivalent of `cargo publish`'s automatic check that the declared dependency is actually available in the registry.

## Benchmarks

Zarr uses [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/latest/) for running
Expand Down
28 changes: 28 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ dependencies = [
'google-crc32c>=1.5',
'typing_extensions>=4.13',
'donfig>=0.8',
'zarr-metadata>=0.1.1,<0.2',
]

dynamic = [
Expand Down Expand Up @@ -147,6 +148,15 @@ omit = [
"src/zarr/testing/conftest.py", # only for downstream projects
]

# When developing zarr-python locally, resolve zarr-metadata from the in-tree
# package under packages/zarr-metadata/. The `[project.dependencies]` version
# requirement is what propagates to consumers installing from PyPI.
[tool.uv.workspace]
members = ["packages/zarr-metadata"]

[tool.uv.sources]
zarr-metadata = { workspace = true }

[tool.hatch]
version.source = "vcs"

Expand All @@ -155,9 +165,18 @@ hooks.vcs.version-file = "src/zarr/_version.py"

[tool.hatch.envs.dev]
dependency-groups = ["dev"]
# Resolve zarr-metadata from the in-tree workspace member, not PyPI. See
# `[tool.uv.sources]` above for the equivalent for `uv run` invocations.
workspace.members = ["packages/zarr-metadata"]

[tool.hatch.envs.test]
dependency-groups = ["test"]
# Resolve zarr-metadata from the in-tree workspace member, not PyPI, so CI
# in `test.yml` exercises the integration between the two packages on every
# PR. Envs that inherit via `template = "test"` (gputest, upstream) pick
# this up automatically; min_deps overrides it (see below) to test against
# the published floor.
workspace.members = ["packages/zarr-metadata"]

[tool.hatch.envs.test.env-vars]

Expand Down Expand Up @@ -240,6 +259,11 @@ template = "test"
python = "3.12"
features = ["remote"]
dependency-groups = ["remote-tests"]
# Override the inherited workspace.members so this env tests against the
# minimum supported zarr-metadata from PyPI (the floor of the version range
# declared in [project.dependencies]) instead of the in-tree source. This
# keeps the "minimum supported deps" guarantee honest.
workspace.members = []
extra-dependencies = [
'packaging==22.*',
'numpy==2.0.*',
Expand All @@ -250,6 +274,10 @@ extra-dependencies = [
'typing_extensions==4.13.*',
'donfig==0.8.*',
'obstore==0.5.*',
# Pin to the floor of zarr-python's declared zarr-metadata range. Must
# match the >= bound in [project.dependencies] above; the
# `check_min_deps_floor.py` pre-commit hook enforces this invariant.
'zarr-metadata==0.1.1',
]

[tool.hatch.envs.defaults]
Expand Down
Loading
Loading