Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions vortex-duckdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ tracing = { workspace = true }
tracing-subscriber = { workspace = true }
url = { workspace = true }
vortex = { workspace = true, features = ["files", "tokio", "object_store"] }
vortex-parquet-variant = { workspace = true }
vortex-utils = { workspace = true, features = ["dashmap"] }

[dev-dependencies]
Expand Down
27 changes: 24 additions & 3 deletions vortex-duckdb/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const DEFAULT_DUCKDB_VERSION: &str = "1.5.3";

const BUILD_ARTIFACTS: [&str; 3] = ["libduckdb.dylib", "libduckdb.so", "libduckdb_static.a"];

const SOURCE_FILES: [&str; 17] = [
const SOURCE_FILES: [&str; 18] = [
"cpp/client_context.cpp",
"cpp/config.cpp",
"cpp/copy_function.cpp",
Expand All @@ -39,6 +39,7 @@ const SOURCE_FILES: [&str; 17] = [
"cpp/table_filter.cpp",
"cpp/table_function.cpp",
"cpp/value.cpp",
"cpp/variant.cpp",
"cpp/vector.cpp",
"cpp/vector_buffer.cpp",
];
Expand Down Expand Up @@ -349,12 +350,22 @@ fn c2rust(crate_dir: &Path, duckdb_include_dir: &Path) {
}
}

fn cpp(duckdb_include_dir: &Path) {
fn cpp(
duckdb_include_dir: &Path,
duckdb_parquet_include_dir: &Path,
duckdb_parquet_third_party_dir: &Path,
duckdb_thrift_include_dir: &Path,
duckdb_yyjson_include_dir: &Path,
) {
cc::Build::new()
.std("c++20")
.flags(["-Wall", "-Wextra", "-Wpedantic"])
.cpp(true)
.include(duckdb_include_dir)
.include(duckdb_parquet_include_dir)
.include(duckdb_parquet_third_party_dir)
.include(duckdb_thrift_include_dir)
.include(duckdb_yyjson_include_dir)
.include("cpp/include")
.files(SOURCE_FILES)
.compile("vortex-duckdb-extras");
Expand Down Expand Up @@ -470,6 +481,16 @@ fn main() {
let duckdb_include_dir = inner_dir.join("src").join("include");
println!("cargo:rerun-if-changed=cpp/include");
c2rust(&crate_dir, &duckdb_include_dir);
cpp(&duckdb_include_dir);
let duckdb_parquet_include_dir = inner_dir.join("extension").join("parquet").join("include");
let duckdb_parquet_third_party_dir = inner_dir.join("third_party").join("parquet");
let duckdb_thrift_include_dir = inner_dir.join("third_party").join("thrift");
let duckdb_yyjson_include_dir = inner_dir.join("third_party").join("yyjson").join("include");
cpp(
&duckdb_include_dir,
&duckdb_parquet_include_dir,
&duckdb_parquet_third_party_dir,
&duckdb_thrift_include_dir,
&duckdb_yyjson_include_dir,
);
rust2c(&crate_dir);
}
1 change: 1 addition & 0 deletions vortex-duckdb/cpp/include/duckdb_vx.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@
#include "duckdb_vx/table_filter.h"
#include "duckdb_vx/table_function.h"
#include "duckdb_vx/value.h"
#include "duckdb_vx/variant.h"
#include "duckdb_vx/vector.h"
#include "duckdb_vx/vector_buffer.h"
1 change: 1 addition & 0 deletions vortex-duckdb/cpp/include/duckdb_vx/logical_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ extern "C" {

char *duckdb_vx_logical_type_stringify(duckdb_logical_type ty);
duckdb_logical_type duckdb_vx_logical_type_copy(duckdb_logical_type ty);
duckdb_logical_type duckdb_vx_logical_type_variant(void);

#ifdef __cplusplus /* End C ABI */
}
Expand Down
4 changes: 4 additions & 0 deletions vortex-duckdb/cpp/include/duckdb_vx/value.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ extern "C" {
// Create a null value with a reference to a logical type.
duckdb_value duckdb_vx_value_create_null(duckdb_logical_type ty);

// Clone a DuckDB value.
// The returned value must be freed with duckdb_destroy_value.
duckdb_value duckdb_vx_value_clone(duckdb_value value);

#ifdef __cplusplus /* End C ABI */
}
#endif
30 changes: 30 additions & 0 deletions vortex-duckdb/cpp/include/duckdb_vx/variant.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#pragma once

#include "duckdb_vx/duckdb_diagnostics.h"

DUCKDB_INCLUDES_BEGIN
#include "duckdb.h"
DUCKDB_INCLUDES_END

#include "duckdb_vx/error.h"

#ifdef __cplusplus /* If compiled as C++, use C ABI */
extern "C" {
#endif

duckdb_vector duckdb_vx_variant_to_parquet(duckdb_vector variant, idx_t len, duckdb_vx_error *err);

void duckdb_vx_variant_from_parquet(duckdb_vector metadata,
duckdb_vector value,
duckdb_vector typed_value,
bool has_typed_value,
duckdb_vector out,
idx_t len,
duckdb_vx_error *err);

#ifdef __cplusplus /* End C ABI */
}
#endif
5 changes: 5 additions & 0 deletions vortex-duckdb/cpp/logical_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ duckdb_logical_type duckdb_vx_logical_type_copy(duckdb_logical_type ty) {
return reinterpret_cast<duckdb_logical_type>(copy.release());
}

duckdb_logical_type duckdb_vx_logical_type_variant(void) {
auto type = duckdb::make_uniq<duckdb::LogicalType>(duckdb::LogicalType::VARIANT());
return reinterpret_cast<duckdb_logical_type>(type.release());
}

char *duckdb_vx_logical_type_stringify(duckdb_logical_type c_type) {
auto type = reinterpret_cast<duckdb::LogicalType *>(c_type);
auto str = type->ToString();
Expand Down
9 changes: 9 additions & 0 deletions vortex-duckdb/cpp/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,12 @@ extern "C" duckdb_value duckdb_vx_value_create_null(duckdb_logical_type ty) {
auto value = duckdb::make_uniq<duckdb::Value>(*logical_type);
return reinterpret_cast<duckdb_value>(value.release());
}

extern "C" duckdb_value duckdb_vx_value_clone(duckdb_value value) {
if (!value) {
return nullptr;
}

const auto ddb_value = reinterpret_cast<duckdb::Value *>(value);
return reinterpret_cast<duckdb_value>(new duckdb::Value(*ddb_value));
}
111 changes: 111 additions & 0 deletions vortex-duckdb/cpp/variant.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#include "duckdb_vx/variant.h"
#include "duckdb_vx/error.hpp"

DUCKDB_INCLUDES_BEGIN
#include "duckdb/common/types.hpp"
#include "duckdb/common/types/data_chunk.hpp"
#include "duckdb/common/types/variant_value.hpp"
#include "duckdb/common/types/vector.hpp"
#include "duckdb/execution/expression_executor_state.hpp"
#include "duckdb/function/scalar_function.hpp"
#include "duckdb/planner/expression/bound_function_expression.hpp"
#include "reader/variant/variant_shredded_conversion.hpp"
#include "writer/variant_column_writer.hpp"
DUCKDB_INCLUDES_END

#include <exception>

using namespace duckdb;

namespace {

duckdb::LogicalType UnshreddedParquetVariantType() {
duckdb::child_list_t<duckdb::LogicalType> children;
children.emplace_back("metadata", duckdb::LogicalType::BLOB);
children.emplace_back("value", duckdb::LogicalType::BLOB);
auto type = duckdb::LogicalType::STRUCT(std::move(children));
type.SetAlias("PARQUET_VARIANT");
return type;
}

duckdb::LogicalType ParquetVariantGroupType(Vector &value, optional_ptr<Vector> typed_value) {
duckdb::child_list_t<duckdb::LogicalType> children;
children.emplace_back("value", value.GetType());
if (typed_value) {
children.emplace_back("typed_value", typed_value->GetType());
}
return duckdb::LogicalType::STRUCT(std::move(children));
}

void SetException(duckdb_vx_error *err, const std::exception &e) {
vortex::SetError(err, e.what());
}

void SetUnknownException(duckdb_vx_error *err) {
vortex::SetError(err, "unknown DuckDB Variant conversion error");
}

} // namespace

extern "C" duckdb_vector duckdb_vx_variant_to_parquet(duckdb_vector variant, idx_t len, duckdb_vx_error *err) {
try {
auto &variant_vector = *reinterpret_cast<Vector *>(variant);
auto result_type = UnshreddedParquetVariantType();
auto result = make_uniq<Vector>(result_type, len);

DataChunk input;
input.Initialize(Allocator::DefaultAllocator(), {duckdb::LogicalType::VARIANT()}, len);
input.SetCardinality(len);
input.data[0].Reference(variant_vector);

auto transform = VariantColumnWriter::GetTransformFunction();
ExpressionExecutorState root;
BoundFunctionExpression expr(result_type, transform, {}, nullptr);
ExpressionState state(expr, root);
transform.function(input, state, *result);

*err = nullptr;
return reinterpret_cast<duckdb_vector>(result.release());
} catch (const std::exception &e) {
SetException(err, e);
} catch (...) {
SetUnknownException(err);
}
return nullptr;
}

extern "C" void duckdb_vx_variant_from_parquet(duckdb_vector metadata,
duckdb_vector value,
duckdb_vector typed_value,
bool has_typed_value,
duckdb_vector out,
idx_t len,
duckdb_vx_error *err) {
try {
auto &metadata_vector = *reinterpret_cast<Vector *>(metadata);
auto &value_vector = *reinterpret_cast<Vector *>(value);
optional_ptr<Vector> typed_value_vector;
if (has_typed_value) {
typed_value_vector = reinterpret_cast<Vector *>(typed_value);
}

Vector group(ParquetVariantGroupType(value_vector, typed_value_vector), len);
auto &entries = StructVector::GetEntries(group);
entries[0]->Reference(value_vector);
if (typed_value_vector) {
entries[1]->Reference(*typed_value_vector);
}

auto values = VariantShreddedConversion::Convert(metadata_vector, group, 0, len, len);
auto &out_vector = *reinterpret_cast<Vector *>(out);
VariantValue::ToVARIANT(values, out_vector);
*err = nullptr;
} catch (const std::exception &e) {
SetException(err, e);
} catch (...) {
SetUnknownException(err);
}
}
15 changes: 12 additions & 3 deletions vortex-duckdb/src/convert/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
//! | `Date` | `DATE` |
//! | `Time` | `TIME` |
//! | `Timestamp` | `TIMESTAMP` |
//! | `Variant` | `VARIANT` |

use std::ffi::CString;
use std::sync::Arc;
Expand Down Expand Up @@ -239,9 +240,7 @@ impl TryFrom<&DType> for LogicalType {
return LogicalType::try_from(struct_type);
}
DType::Union(..) => todo!("TODO(connor)[Union]: unimplemented"),
DType::Variant(_) => {
vortex_bail!("Vortex Variant array aren't supported in DuckDB")
}
DType::Variant(_) => return Ok(LogicalType::variant()),
DType::Extension(ext_dtype) => {
let Some(temporal) = ext_dtype.metadata_opt::<AnyTemporal>() else {
vortex_bail!("Unsupported extension type \"{}\"", ext_dtype.id());
Expand Down Expand Up @@ -422,6 +421,16 @@ mod tests {
);
}

#[test]
fn test_variant_type() {
let dtype = DType::Variant(Nullability::Nullable);
let logical_type = LogicalType::try_from(&dtype).unwrap();
assert_eq!(
logical_type.as_type_id(),
cpp::DUCKDB_TYPE::DUCKDB_TYPE_VARIANT
);
}

#[test]
fn test_struct_type() {
let field_names = FieldNames::from([FieldName::from("field1"), FieldName::from("field2")]);
Expand Down
3 changes: 3 additions & 0 deletions vortex-duckdb/src/convert/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,9 @@ impl<'a> TryFrom<&'a ValueRef> for Scalar {
vortex_bail!("List value must be a list or struct dtype")
}
},
ExtractedValue::Variant(_) => {
vortex_bail!("DuckDB Variant scalars aren't supported in Vortex")
}
}
}
}
Expand Down
Loading
Loading