diff --git a/CHANGELOG.md b/CHANGELOG.md index cecad0bd..01d4cdfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Changed +- **Rust dependency upgrades**: bumped `rand` 0.8 → 0.10 and `rand_xoshiro` 0.6 → 0.8 in the Rust backend (the two crates are coupled through `rand_core` and must move together). MSRV bumped from Rust 1.84 → 1.85 to satisfy the new dependency requirements. Three call sites in `rust/src/bootstrap.rs` updated for the `rand 0.9` API rename: `gen::()` → `random::()`, `gen::()` → `random::()`, `gen_range(0..6)` → `random_range(0..6)`. **Webb wild bootstrap byte stream shifted** as a side effect: `rand 0.9` reworked the internal algorithm for `random_range` (improved rejection sampling), so `Xoshiro256PlusPlus::seed_from_u64(seed)` followed by `random_range(0..6)` consumes RNG bytes differently than the old `gen_range(0..6)` did. Distributional properties of Webb weights are unchanged (still uniform over the 6-point support); aggregate inference (SE, p-values, CI) converges to the same values for any reasonable `n_bootstrap`. Rademacher and Mammen byte streams are bit-identical to the prior release. Anyone with a saved Rust+Webb baseline pinning specific seeded results will see different numbers; the regression test suite uses within-build seed-reproducibility (not cross-version baselines) so all internal tests pass unchanged. New regression guard `TestRustBackend::test_bootstrap_weights_bit_identity_snapshot` pins fixed-seed weights for all three weight types, so any future RNG drift fails loudly with a localized error message. + ## [3.3.1] - 2026-04-25 ### Changed diff --git a/rust/Cargo.toml b/rust/Cargo.toml index b2878c35..40b7e33a 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -2,7 +2,7 @@ name = "diff_diff_rust" version = "3.3.1" edition = "2021" -rust-version = "1.84" +rust-version = "1.85" description = "Rust backend for diff-diff DiD library" license = "MIT" @@ -25,8 +25,8 @@ openblas = ["ndarray/blas"] pyo3 = "0.28" numpy = "0.28" ndarray = { version = "0.17", features = ["rayon"] } -rand = "0.8" -rand_xoshiro = "0.6" +rand = "0.10" +rand_xoshiro = "0.8" rayon = "1.8" # Pure Rust linear algebra for SVD/matrix inversion (no external deps). diff --git a/rust/src/bootstrap.rs b/rust/src/bootstrap.rs index 71d5093f..f0733f1d 100644 --- a/rust/src/bootstrap.rs +++ b/rust/src/bootstrap.rs @@ -67,7 +67,7 @@ fn generate_rademacher_batch(n_bootstrap: usize, n_units: usize, seed: u64) -> A .for_each(|(i, mut row)| { let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed.wrapping_add(i as u64)); for elem in row.iter_mut() { - *elem = if rng.gen::() { 1.0 } else { -1.0 }; + *elem = if rng.random::() { 1.0 } else { -1.0 }; } }); @@ -102,7 +102,7 @@ fn generate_mammen_batch(n_bootstrap: usize, n_units: usize, seed: u64) -> Array .for_each(|(i, mut row)| { let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed.wrapping_add(i as u64)); for elem in row.iter_mut() { - *elem = if rng.gen::() < prob_neg { + *elem = if rng.random::() < prob_neg { val_neg } else { val_pos @@ -142,7 +142,7 @@ fn generate_webb_batch(n_bootstrap: usize, n_units: usize, seed: u64) -> Array2< let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed.wrapping_add(i as u64)); for elem in row.iter_mut() { // Uniform selection: generate integer 0-5, index into weights_table - let bucket = rng.gen_range(0..6); + let bucket = rng.random_range(0..6); *elem = weights_table[bucket]; } }); diff --git a/tests/test_rust_backend.py b/tests/test_rust_backend.py index 48d1f0e5..1aa64b20 100644 --- a/tests/test_rust_backend.py +++ b/tests/test_rust_backend.py @@ -100,6 +100,57 @@ def test_bootstrap_different_seeds(self): weights2 = generate_bootstrap_weights_batch(100, 50, "rademacher", 43) assert not np.array_equal(weights1, weights2) + def test_bootstrap_weights_bit_identity_snapshot(self): + """Pin fixed-seed bootstrap weight output byte-for-byte. + + Regression guard against silent RNG output drift across + `rand` / `rand_xoshiro` crate upgrades. Distributional moment + tests would not catch a byte shift that preserves the + distribution (e.g. `rand 0.9`'s `random_range` algorithm + change relative to `rand 0.8`'s `gen_range`). + + If this test fails after a Rust dependency bump, the byte stream + has shifted. Decide deliberately whether to accept the new + baseline (regenerate these values) or pin to a compatible + crate version. + """ + from diff_diff._rust_backend import generate_bootstrap_weights_batch + + # Captured under rand 0.10 + rand_xoshiro 0.8 with seed=42. + # Rademacher and Mammen bytes match rand 0.8 + rand_xoshiro 0.6; + # Webb bytes shifted in the rand 0.9 random_range algorithm change. + expected = { + "rademacher": np.array( + [ + [1.0, -1.0, 1.0, 1.0], + [-1.0, 1.0, 1.0, 1.0], + ] + ), + "mammen": np.array( + [ + [1.618033988749895, -0.6180339887498949, 1.618033988749895, -0.6180339887498949], + [-0.6180339887498949, -0.6180339887498949, 1.618033988749895, 1.618033988749895], + ] + ), + "webb": np.array( + [ + [1.0, -1.0, 1.224744871391589, 1.0], + [-1.0, 0.7071067811865476, 1.224744871391589, 1.224744871391589], + ] + ), + } + for weight_type, expected_arr in expected.items(): + actual = generate_bootstrap_weights_batch(2, 4, weight_type, 42) + # Strict bit-identity: the snapshot values are either exact + # (Rademacher = +/-1.0) or computed once via correctly-rounded + # IEEE 754 sqrt in Rust (Mammen, Webb), so cross-platform + # bit-equality holds on conformant hardware. + np.testing.assert_array_equal( + actual, + expected_arr, + err_msg=f"{weight_type} bootstrap weights drifted from pinned baseline", + ) + # ========================================================================= # Synthetic Weight Tests # =========================================================================