From 392e06b45839999ade3187f9cd80822b3ad137a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Rou=C3=A9l?= Date: Sun, 19 Apr 2026 17:13:36 +0200 Subject: [PATCH] GH-3497: Fix thread-unsafe static singleton in `DefaultValuesWriterFactory` causing use-after-free `DefaultValuesWriterFactory` delegates to static singleton instances of `DefaultV1ValuesWriterFactory` and `DefaultV2ValuesWriterFactory`. These singletons store a mutable `parquetProperties` reference via `initialize()`, which gets overwritten by whichever `ParquetProperties` instance calls it last. When multiple Parquet writers run concurrently in the same JVM, the merger's column writers end up using the appender's `ByteBufferAllocator`. When the appender is closed and its allocator released, the merger crashes with `IllegalStateException: Already closed` on direct ByteBuffer access because its encoder slabs were allocated from a now-closed arena. Replace the static final `DEFAULT_V1_WRITER_FACTORY` and `DEFAULT_V2_WRITER_FACTORY` singletons in `DefaultValuesWriterFactory` with fresh instances created per `initialize()` call. This ensures each `ParquetProperties` instance (and its allocator) is isolated to its own factory, eliminating cross-contamination between concurrent writers sharing the same JVM. --- .../factory/DefaultValuesWriterFactory.java | 7 +-- .../DefaultValuesWriterFactoryTest.java | 51 +++++++++++++++++++ 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultValuesWriterFactory.java b/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultValuesWriterFactory.java index 3759cfe86c..4c03e6b65e 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultValuesWriterFactory.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultValuesWriterFactory.java @@ -33,15 +33,12 @@ public class DefaultValuesWriterFactory implements ValuesWriterFactory { private ValuesWriterFactory delegateFactory; - private static final ValuesWriterFactory DEFAULT_V1_WRITER_FACTORY = new DefaultV1ValuesWriterFactory(); - private static final ValuesWriterFactory DEFAULT_V2_WRITER_FACTORY = new DefaultV2ValuesWriterFactory(); - @Override public void initialize(ParquetProperties properties) { if (properties.getWriterVersion() == WriterVersion.PARQUET_1_0) { - delegateFactory = DEFAULT_V1_WRITER_FACTORY; + delegateFactory = new DefaultV1ValuesWriterFactory(); } else { - delegateFactory = DEFAULT_V2_WRITER_FACTORY; + delegateFactory = new DefaultV2ValuesWriterFactory(); } delegateFactory.initialize(properties); diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/factory/DefaultValuesWriterFactoryTest.java b/parquet-column/src/test/java/org/apache/parquet/column/values/factory/DefaultValuesWriterFactoryTest.java index 37fca55ef6..17f786c4a5 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/values/factory/DefaultValuesWriterFactoryTest.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/values/factory/DefaultValuesWriterFactoryTest.java @@ -23,8 +23,12 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; import static org.apache.parquet.schema.Types.required; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; +import java.lang.reflect.Field; +import org.apache.parquet.bytes.ByteBufferAllocator; +import org.apache.parquet.bytes.HeapByteBufferAllocator; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.column.ParquetProperties.WriterVersion; @@ -807,4 +811,51 @@ private void validateFallbackWriter( validateWriterType(wr.initialWriter, initialWriterClass); validateWriterType(wr.fallBackWriter, fallbackWriterClass); } + + /** + * Verifies that two independently built ParquetProperties instances produce ValuesWriters + * that use their own respective allocators, not a shared/stale reference from a static singleton. + */ + @Test + public void testFactoryIsolation_eachPropertiesUsesOwnAllocator() throws Exception { + ByteBufferAllocator allocatorA = new HeapByteBufferAllocator(); + ByteBufferAllocator allocatorB = new HeapByteBufferAllocator(); + + ParquetProperties propsA = ParquetProperties.builder() + .withWriterVersion(WriterVersion.PARQUET_2_0) + .withAllocator(allocatorA) + .build(); + + ParquetProperties propsB = ParquetProperties.builder() + .withWriterVersion(WriterVersion.PARQUET_2_0) + .withAllocator(allocatorB) + .build(); + + ColumnDescriptor col = createColumnDescriptor(PrimitiveTypeName.INT32); + + // Create a writer from propsA's factory + ValuesWriter writerA = propsA.getValuesWriterFactory().newValuesWriter(col); + // Then create a writer from propsB's factory (this used to overwrite the static singleton) + ValuesWriter writerB = propsB.getValuesWriterFactory().newValuesWriter(col); + // Now create another writer from propsA's factory + ValuesWriter writerA2 = propsA.getValuesWriterFactory().newValuesWriter(col); + + // All writers from propsA should use allocatorA + assertSame("writerA should use allocatorA", allocatorA, getDictionaryWriterAllocator(writerA)); + assertSame( + "writerA2 should use allocatorA (not allocatorB from later initialization)", + allocatorA, + getDictionaryWriterAllocator(writerA2)); + + // Writers from propsB should use allocatorB + assertSame("writerB should use allocatorB", allocatorB, getDictionaryWriterAllocator(writerB)); + } + + private static ByteBufferAllocator getDictionaryWriterAllocator(ValuesWriter writer) throws Exception { + FallbackValuesWriter fallback = (FallbackValuesWriter) writer; + DictionaryValuesWriter dictWriter = (DictionaryValuesWriter) fallback.initialWriter; + Field allocatorField = DictionaryValuesWriter.class.getDeclaredField("allocator"); + allocatorField.setAccessible(true); + return (ByteBufferAllocator) allocatorField.get(dictWriter); + } }