Skip to content

Commit

Permalink
GH-45304: [C++][S3] Workaround compatibility issue between AWS SDK an…
Browse files Browse the repository at this point in the history
…d MinIO (#45310)

### Rationale for this change

Some AWS SDK versions have faulty chunked encoding when the body is 0 bytes:
aws/aws-sdk-cpp#3259

### What changes are included in this PR?

Work around faulty chunked encoding implementation by only setting a body stream if non-empty.

### Are these changes tested?

Locally for now, but will be picked by CI (and conda-forge) at some point.

### Are there any user-facing changes?

No.

* GitHub Issue: #45304

Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
pitrou authored Jan 22, 2025
1 parent 3a3f2ec commit 12f6265
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 13 deletions.
29 changes: 17 additions & 12 deletions cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1983,27 +1983,33 @@ class ObjectOutputStream final : public io::OutputStream {
const void* data, int64_t nbytes, std::shared_ptr<Buffer> owned_buffer = nullptr) {
req.SetBucket(ToAwsString(path_.bucket));
req.SetKey(ToAwsString(path_.key));
req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
req.SetContentLength(nbytes);
RETURN_NOT_OK(SetSSECustomerKey(&req, sse_customer_key_));

if (!background_writes_) {
req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
// GH-45304: avoid setting a body stream if length is 0.
// This workaround can be removed once we require AWS SDK 1.11.489 or later.
if (nbytes != 0) {
req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
}

ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder_));

RETURN_NOT_OK(sync_result_callback(req, upload_state_, part_number_, outcome));
} else {
// If the data isn't owned, make an immutable copy for the lifetime of the closure
if (owned_buffer == nullptr) {
ARROW_ASSIGN_OR_RAISE(owned_buffer, AllocateBuffer(nbytes, io_context_.pool()));
memcpy(owned_buffer->mutable_data(), data, nbytes);
} else {
DCHECK_EQ(data, owned_buffer->data());
DCHECK_EQ(nbytes, owned_buffer->size());
// (GH-45304: avoid setting a body stream if length is 0, see above)
if (nbytes != 0) {
// If the data isn't owned, make an immutable copy for the lifetime of the closure
if (owned_buffer == nullptr) {
ARROW_ASSIGN_OR_RAISE(owned_buffer, AllocateBuffer(nbytes, io_context_.pool()));
memcpy(owned_buffer->mutable_data(), data, nbytes);
} else {
DCHECK_EQ(data, owned_buffer->data());
DCHECK_EQ(nbytes, owned_buffer->size());
}
req.SetBody(std::make_shared<StringViewStream>(owned_buffer->data(),
owned_buffer->size()));
}
req.SetBody(
std::make_shared<StringViewStream>(owned_buffer->data(), owned_buffer->size()));

{
std::unique_lock<std::mutex> lock(upload_state_->mutex);
Expand Down Expand Up @@ -2345,7 +2351,6 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
req.SetBucket(ToAwsString(bucket));
req.SetKey(ToAwsString(key));
req.SetContentType(kAwsDirectoryContentType);
req.SetBody(std::make_shared<std::stringstream>(""));
return OutcomeToStatus(
std::forward_as_tuple("When creating key '", key, "' in bucket '", bucket, "': "),
"PutObject", client_lock.Move()->PutObject(req));
Expand Down
1 change: 0 additions & 1 deletion cpp/src/arrow/filesystem/s3fs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,6 @@ class TestS3FS : public S3TestMixin {
Aws::S3::Model::PutObjectRequest req;
req.SetBucket(ToAwsString("bucket"));
req.SetKey(ToAwsString("emptydir/"));
req.SetBody(std::make_shared<std::stringstream>(""));
RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
// NOTE: no need to create intermediate "directories" somedir/ and
// somedir/subdir/
Expand Down

0 comments on commit 12f6265

Please sign in to comment.