Skip to content

Commit

Permalink
Review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
rok committed Sep 2, 2024
1 parent d028339 commit 69273f4
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 51 deletions.
4 changes: 0 additions & 4 deletions cpp/src/arrow/extension/json.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@ class ARROW_EXPORT JsonExtensionType : public ExtensionType {

std::string extension_name() const override { return "arrow.json"; }

std::string ToString(bool show_metadata = false) const override {
return "extension<json>";
};

bool ExtensionEquals(const ExtensionType& other) const override;

Result<std::shared_ptr<DataType>> Deserialize(
Expand Down
58 changes: 38 additions & 20 deletions cpp/src/arrow/extension/json_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,6 @@ std::shared_ptr<Array> ExampleJson(const std::shared_ptr<DataType>& storage_type
return ExtensionType::WrapArray(arrow::extension::json(storage_type), arr);
}

static std::shared_ptr<Array> ExampleJsonInvalidUTF8(
const std::shared_ptr<DataType>& storage_type) {
return ArrayFromJSON(storage_type,
"["
R"(
"Hi",
"olá mundo",
"你好世界",
"",
)"
"\"\xa0\xa1\""
"]");
}

TEST_F(TestJsonExtensionType, JsonRoundtrip) {
for (const auto& storage_type : {utf8(), large_utf8(), utf8_view()}) {
std::shared_ptr<Array> ext_arr = ExampleJson(storage_type);
Expand All @@ -77,14 +63,46 @@ TEST_F(TestJsonExtensionType, JsonRoundtrip) {

TEST_F(TestJsonExtensionType, InvalidUTF8) {
for (const auto& storage_type : {utf8(), large_utf8(), utf8_view()}) {
std::shared_ptr<Array> ext_arr = ExampleJsonInvalidUTF8(storage_type);
auto batch =
RecordBatch::Make(schema({field("f0", json(storage_type))}), 8, {ext_arr});
auto json_type = json(storage_type);
auto invalid_input = ArrayFromJSON(storage_type, "[\"Ⱥa\xFF\", \"\xe1\xbdⱤaA\"]");
auto ext_arr =
ExtensionType::WrapArray(arrow::extension::json(storage_type), invalid_input);

ASSERT_RAISES_WITH_MESSAGE(Invalid,
"Invalid: Invalid UTF8 sequence at string index 0",
ext_arr->ValidateFull());
ASSERT_RAISES_WITH_MESSAGE(Invalid,
"Invalid: Invalid UTF8 sequence at string index 0",
arrow::internal::ValidateUTF8(*ext_arr));

auto batch = RecordBatch::Make(schema({field("f0", json_type)}), 2, {ext_arr});
std::shared_ptr<RecordBatch> read_batch;
ASSERT_RAISES_WITH_MESSAGE(IOError,
"IOError: Array length did not match record batch length",
RoundtripBatch(batch, &read_batch));
ASSERT_OK(RoundtripBatch(batch, &read_batch));
}
}

class UTF8ExtensionArrayTest : public ::testing::Test {
public:
static std::shared_ptr<Array> ExampleJson(
const std::shared_ptr<DataType>& storage_type) {
std::shared_ptr<Array> arr = ArrayFromJSON(storage_type, R"([
"null",
"1234",
"3.14159",
"true",
"false",
"\"a json string\"",
"[\"a\", \"json\", \"array\"]",
"{\"obj\": \"a simple json object\"}"
])");
return ExtensionType::WrapArray(arrow::extension::json(storage_type), arr);
}
};

TEST_F(UTF8ExtensionArrayTest, JSONExtensionType) {
for (const auto& storage_type : {utf8(), large_utf8(), utf8_view()}) {
const auto ext_arr = ExampleJson(storage_type);
ASSERT_OK(arrow::internal::ValidateUTF8(*ext_arr));
}
}

Expand Down
27 changes: 0 additions & 27 deletions cpp/src/arrow/util/utf8_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@

#include <gtest/gtest.h>

#include "arrow/array/validate.h"
#include "arrow/extension/json.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/util/string.h"
#include "arrow/util/utf8_internal.h"
Expand Down Expand Up @@ -564,30 +562,5 @@ TEST(UTF8Length, Basics) {
ASSERT_EQ(length("\xf0\x9f\x99\x8c"), 1);
}

class UTF8ExtensionArrayTest : public ::testing::Test {
public:
static std::shared_ptr<Array> ExampleJson(
const std::shared_ptr<DataType>& storage_type) {
std::shared_ptr<Array> arr = ArrayFromJSON(storage_type, R"([
"null",
"1234",
"3.14159",
"true",
"false",
"\"a json string\"",
"[\"a\", \"json\", \"array\"]",
"{\"obj\": \"a simple json object\"}"
])");
return ExtensionType::WrapArray(arrow::extension::json(storage_type), arr);
}
};

TEST_F(UTF8ExtensionArrayTest, JSONExtensionType) {
for (const auto& storage_type : {utf8(), large_utf8(), utf8_view()}) {
const auto ext_arr = ExampleJson(storage_type);
ASSERT_OK(arrow::internal::ValidateUTF8(*ext_arr));
}
}

} // namespace util
} // namespace arrow

0 comments on commit 69273f4

Please sign in to comment.