-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Format] Add Opaque canonical extension type #41823
Closed
Closed
Changes from all commits
Commits
Show all changes
27 commits
Select commit
Hold shift + click to select a range
5d52362
WIP: [Format] Add Other canonical extension type
lidavidm d3cd22f
Feedback
lidavidm b486b36
Update example
lidavidm 194bec6
revise
lidavidm 811590b
Revise
lidavidm f8e0905
Revise (2)
lidavidm 767cbf5
Implement Java extension type
lidavidm b9e1a43
Start C++ side
lidavidm 2604f20
Rename to Opaque
lidavidm 8a7cafb
Rename to Opaque
lidavidm e26afe0
C++ impl sans tests
lidavidm aace850
Some tests for C++
lidavidm 469c9a4
Finish up C++ tests
lidavidm 1456a3b
Add Python impl
lidavidm 978a624
Add Python tests
lidavidm 1bd72f3
Fix checkstyle
lidavidm 16a200b
Fix CI
lidavidm 95f5c0e
Add Python tests
lidavidm 4bd1b86
add Go implementation
zeroshade 8f2322b
Bikeshed
lidavidm 1462b7c
Update
lidavidm fc7007a
Fix name
lidavidm 8d7a1e1
Fix name
lidavidm 90caa77
Update docs/source/format/CanonicalExtensions.rst
lidavidm a373962
Merge remote-tracking branch 'upstream/main' into ext-other
lidavidm db87ddb
fix go
lidavidm 3bc9234
Apply suggestions from code review
lidavidm File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#include "arrow/extension/opaque.h" | ||
|
||
#include <sstream> | ||
|
||
#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep | ||
#include "arrow/util/logging.h" | ||
|
||
#include <rapidjson/document.h> | ||
#include <rapidjson/error/en.h> | ||
#include <rapidjson/writer.h> | ||
|
||
namespace arrow::extension { | ||
|
||
std::string OpaqueType::ToString(bool show_metadata) const { | ||
std::stringstream ss; | ||
ss << "extension<" << this->extension_name() | ||
<< "[storage_type=" << storage_type_->ToString() << ", type_name=" << type_name_ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
<< ", vendor_name=" << vendor_name_ << "]>"; | ||
return ss.str(); | ||
} | ||
|
||
bool OpaqueType::ExtensionEquals(const ExtensionType& other) const { | ||
if (extension_name() != other.extension_name()) { | ||
return false; | ||
} | ||
const auto& opaque = internal::checked_cast<const OpaqueType&>(other); | ||
return storage_type()->Equals(*opaque.storage_type()) && | ||
type_name() == opaque.type_name() && vendor_name() == opaque.vendor_name(); | ||
} | ||
|
||
std::string OpaqueType::Serialize() const { | ||
rapidjson::Document document; | ||
document.SetObject(); | ||
rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); | ||
|
||
rapidjson::Value type_name(rapidjson::StringRef(type_name_)); | ||
document.AddMember(rapidjson::Value("type_name", allocator), type_name, allocator); | ||
rapidjson::Value vendor_name(rapidjson::StringRef(vendor_name_)); | ||
document.AddMember(rapidjson::Value("vendor_name", allocator), vendor_name, allocator); | ||
|
||
rapidjson::StringBuffer buffer; | ||
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); | ||
document.Accept(writer); | ||
return buffer.GetString(); | ||
} | ||
|
||
Result<std::shared_ptr<DataType>> OpaqueType::Deserialize( | ||
std::shared_ptr<DataType> storage_type, const std::string& serialized_data) const { | ||
rapidjson::Document document; | ||
const auto& parsed = document.Parse(serialized_data.data(), serialized_data.length()); | ||
if (parsed.HasParseError()) { | ||
return Status::Invalid("Invalid serialized JSON data for OpaqueType: ", | ||
rapidjson::GetParseError_En(parsed.GetParseError()), ": ", | ||
serialized_data); | ||
} else if (!document.IsObject()) { | ||
return Status::Invalid("Invalid serialized JSON data for OpaqueType: not an object"); | ||
} | ||
if (!document.HasMember("type_name")) { | ||
return Status::Invalid( | ||
"Invalid serialized JSON data for OpaqueType: missing type_name"); | ||
} else if (!document.HasMember("vendor_name")) { | ||
return Status::Invalid( | ||
"Invalid serialized JSON data for OpaqueType: missing vendor_name"); | ||
} | ||
|
||
const auto& type_name = document["type_name"]; | ||
const auto& vendor_name = document["vendor_name"]; | ||
if (!type_name.IsString()) { | ||
return Status::Invalid( | ||
"Invalid serialized JSON data for OpaqueType: type_name is not a string"); | ||
} else if (!vendor_name.IsString()) { | ||
return Status::Invalid( | ||
"Invalid serialized JSON data for OpaqueType: vendor_name is not a string"); | ||
} | ||
|
||
return opaque(std::move(storage_type), type_name.GetString(), vendor_name.GetString()); | ||
} | ||
|
||
std::shared_ptr<Array> OpaqueType::MakeArray(std::shared_ptr<ArrayData> data) const { | ||
DCHECK_EQ(data->type->id(), Type::EXTENSION); | ||
DCHECK_EQ("arrow.opaque", | ||
internal::checked_cast<const ExtensionType&>(*data->type).extension_name()); | ||
return std::make_shared<OpaqueArray>(data); | ||
} | ||
|
||
std::shared_ptr<DataType> opaque(std::shared_ptr<DataType> storage_type, | ||
std::string type_name, std::string vendor_name) { | ||
return std::make_shared<OpaqueType>(std::move(storage_type), std::move(type_name), | ||
std::move(vendor_name)); | ||
} | ||
|
||
} // namespace arrow::extension |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#include "arrow/extension_type.h" | ||
#include "arrow/type.h" | ||
|
||
namespace arrow::extension { | ||
|
||
/// \brief Opaque is a placeholder for a type from an external (usually | ||
/// non-Arrow) system that could not be interpreted. | ||
class ARROW_EXPORT OpaqueType : public ExtensionType { | ||
public: | ||
/// \brief Construct an OpaqueType. | ||
/// | ||
/// \param[in] storage_type The underlying storage type. Should be | ||
/// arrow::null if there is no data. | ||
/// \param[in] type_name The name of the type in the external system. | ||
/// \param[in] vendor_name The name of the external system. | ||
explicit OpaqueType(std::shared_ptr<DataType> storage_type, std::string type_name, | ||
std::string vendor_name) | ||
: ExtensionType(std::move(storage_type)), | ||
type_name_(std::move(type_name)), | ||
vendor_name_(std::move(vendor_name)) {} | ||
|
||
std::string extension_name() const override { return "arrow.opaque"; } | ||
std::string ToString(bool show_metadata) const override; | ||
bool ExtensionEquals(const ExtensionType& other) const override; | ||
std::string Serialize() const override; | ||
Result<std::shared_ptr<DataType>> Deserialize( | ||
std::shared_ptr<DataType> storage_type, | ||
const std::string& serialized_data) const override; | ||
/// Create an OpaqueArray from ArrayData | ||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override; | ||
|
||
std::string_view type_name() const { return type_name_; } | ||
std::string_view vendor_name() const { return vendor_name_; } | ||
|
||
private: | ||
std::string type_name_; | ||
std::string vendor_name_; | ||
}; | ||
|
||
/// \brief Opaque is a wrapper for (usually binary) data from an external | ||
/// (often non-Arrow) system that could not be interpreted. | ||
class ARROW_EXPORT OpaqueArray : public ExtensionArray { | ||
public: | ||
using ExtensionArray::ExtensionArray; | ||
}; | ||
|
||
/// \brief Return an OpaqueType instance. | ||
ARROW_EXPORT std::shared_ptr<DataType> opaque(std::shared_ptr<DataType> storage_type, | ||
std::string type_name, | ||
std::string vendor_name); | ||
|
||
} // namespace arrow::extension |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why type_fwd is included?