diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java index 782c009db4..7907ad0a52 100644 --- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java +++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java @@ -351,11 +351,38 @@ private Type convertField(Schema.Field field, String schemaPath, IdentityHashMap } public Schema convert(MessageType parquetSchema) { - return convertFields(parquetSchema.getName(), parquetSchema.getFields(), new HashMap<>()); + return withDisabledNameValidation( + () -> convertFields(parquetSchema.getName(), parquetSchema.getFields(), new HashMap<>())); } Schema convert(GroupType parquetSchema) { - return convertFields(parquetSchema.getName(), parquetSchema.getFields(), new HashMap<>()); + return withDisabledNameValidation( + () -> convertFields(parquetSchema.getName(), parquetSchema.getFields(), new HashMap<>())); + } + + /** + * Temporarily disables Avro name validation so that Parquet field names + * containing characters not allowed by Avro (e.g. hyphens) can be converted. + * The Parquet spec allows any UTF-8 string as a field name. + */ + @SuppressWarnings("unchecked") + private static Schema withDisabledNameValidation(java.util.function.Supplier supplier) { + ThreadLocal validateNames; + try { + java.lang.reflect.Field f = Schema.class.getDeclaredField("validateNames"); + f.setAccessible(true); + validateNames = (ThreadLocal) f.get(null); + } catch (ReflectiveOperationException e) { + // If reflection fails, fall back to default behavior + return supplier.get(); + } + Boolean prev = validateNames.get(); + try { + validateNames.set(Boolean.FALSE); + return supplier.get(); + } finally { + validateNames.set(prev); + } } private Schema convertFields(String name, List parquetFields, Map names) { diff --git a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java index 412e8f2957..a5231ce657 100644 --- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java +++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java @@ -1073,6 +1073,18 @@ public void testDeeplyNestedNonRecursiveSchema() { Assert.assertEquals("Root schema name should be preserved", "Root", result.getName()); } + @Test + public void testHyphenatedColumnName() { + // PARQUET-3364: Parquet spec allows any UTF-8 string as a field name + MessageType parquetSchema = MessageTypeParser.parseMessageType( + "message test {\n required binary Creation-Time (UTF8);\n optional int32 my-count;\n}\n"); + AvroSchemaConverter converter = new AvroSchemaConverter(); + Schema avroSchema = converter.convert(parquetSchema); + Assert.assertNotNull("Schema with hyphenated field names should convert", avroSchema); + Assert.assertNotNull(avroSchema.getField("Creation-Time")); + Assert.assertNotNull(avroSchema.getField("my-count")); + } + public static Schema optional(Schema original) { return Schema.createUnion(Lists.newArrayList(Schema.create(Schema.Type.NULL), original)); }