Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -351,11 +351,38 @@ private Type convertField(Schema.Field field, String schemaPath, IdentityHashMap
}

public Schema convert(MessageType parquetSchema) {
return convertFields(parquetSchema.getName(), parquetSchema.getFields(), new HashMap<>());
return withDisabledNameValidation(
() -> convertFields(parquetSchema.getName(), parquetSchema.getFields(), new HashMap<>()));
}

Schema convert(GroupType parquetSchema) {
return convertFields(parquetSchema.getName(), parquetSchema.getFields(), new HashMap<>());
return withDisabledNameValidation(
() -> convertFields(parquetSchema.getName(), parquetSchema.getFields(), new HashMap<>()));
}

/**
* Temporarily disables Avro name validation so that Parquet field names
* containing characters not allowed by Avro (e.g. hyphens) can be converted.
* The Parquet spec allows any UTF-8 string as a field name.
*/
@SuppressWarnings("unchecked")
private static Schema withDisabledNameValidation(java.util.function.Supplier<Schema> supplier) {
ThreadLocal<Boolean> validateNames;
try {
java.lang.reflect.Field f = Schema.class.getDeclaredField("validateNames");
f.setAccessible(true);
validateNames = (ThreadLocal<Boolean>) f.get(null);
} catch (ReflectiveOperationException e) {
// If reflection fails, fall back to default behavior
return supplier.get();
}
Boolean prev = validateNames.get();
try {
validateNames.set(Boolean.FALSE);
return supplier.get();
} finally {
validateNames.set(prev);
}
}

private Schema convertFields(String name, List<Type> parquetFields, Map<String, Integer> names) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1073,6 +1073,18 @@ public void testDeeplyNestedNonRecursiveSchema() {
Assert.assertEquals("Root schema name should be preserved", "Root", result.getName());
}

@Test
public void testHyphenatedColumnName() {
// PARQUET-3364: Parquet spec allows any UTF-8 string as a field name
MessageType parquetSchema = MessageTypeParser.parseMessageType(
"message test {\n required binary Creation-Time (UTF8);\n optional int32 my-count;\n}\n");
AvroSchemaConverter converter = new AvroSchemaConverter();
Schema avroSchema = converter.convert(parquetSchema);
Assert.assertNotNull("Schema with hyphenated field names should convert", avroSchema);
Assert.assertNotNull(avroSchema.getField("Creation-Time"));
Assert.assertNotNull(avroSchema.getField("my-count"));
}

public static Schema optional(Schema original) {
return Schema.createUnion(Lists.newArrayList(Schema.create(Schema.Type.NULL), original));
}
Expand Down