Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

regenerate arrow-ipc/src/gen with patched flatbuffers #6426

Merged
merged 12 commits into from
Jan 8, 2025
3 changes: 1 addition & 2 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ jobs:
ARROW_INTEGRATION_JAVA: ON
ARROW_INTEGRATION_JS: ON
ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS: "rust"
# Disable nanoarrow integration, due to https://github.com/apache/arrow-rs/issues/5052
Copy link
Contributor

@alamb alamb Sep 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing this line and having the test run successfully is quite compelling. Nice work @bkietz

Screenshot 2024-09-26 at 4 06 35 PM

ARCHERY_INTEGRATION_WITH_NANOARROW: "0"
ARCHERY_INTEGRATION_WITH_NANOARROW: "1"
# https://github.com/apache/arrow/pull/38403/files#r1371281630
ARCHERY_INTEGRATION_WITH_RUST: "1"
# These are necessary because the github runner overrides $HOME
Expand Down
14 changes: 7 additions & 7 deletions arrow-flight/src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1708,7 +1708,7 @@ mod tests {
])
.unwrap();

verify_encoded_split(batch, 112).await;
verify_encoded_split(batch, 120).await;
}

#[tokio::test]
Expand All @@ -1719,7 +1719,7 @@ mod tests {

// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 4304).await;
verify_encoded_split(batch, 4312).await;
}

#[tokio::test]
Expand Down Expand Up @@ -1755,7 +1755,7 @@ mod tests {
// 5k over limit (which is 2x larger than limit of 5k)
// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 5800).await;
verify_encoded_split(batch, 5808).await;
}

#[tokio::test]
Expand All @@ -1771,7 +1771,7 @@ mod tests {

let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();

verify_encoded_split(batch, 48).await;
verify_encoded_split(batch, 56).await;
}

#[tokio::test]
Expand All @@ -1785,7 +1785,7 @@ mod tests {

// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 3328).await;
verify_encoded_split(batch, 3336).await;
}

#[tokio::test]
Expand All @@ -1799,7 +1799,7 @@ mod tests {

// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 5280).await;
verify_encoded_split(batch, 5288).await;
}

#[tokio::test]
Expand All @@ -1824,7 +1824,7 @@ mod tests {

// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
verify_encoded_split(batch, 4128).await;
verify_encoded_split(batch, 4136).await;
}

/// Return size, in memory of flight data
Expand Down
2 changes: 1 addition & 1 deletion arrow-ipc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-data = { workspace = true }
arrow-schema = { workspace = true }
flatbuffers = { version = "24.3.25", default-features = false }
flatbuffers = { version = "24.12.23", default-features = false }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎉

lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true }
zstd = { version = "0.13.0", default-features = false, optional = true }

Expand Down
90 changes: 47 additions & 43 deletions arrow-ipc/regen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,36 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# Change to the toplevel `arrow-rs` directory
pushd $DIR/../

echo "Build flatc from source ..."

FB_URL="https://github.com/google/flatbuffers"
FB_DIR="arrow/.flatbuffers"
FLATC="$FB_DIR/bazel-bin/flatc"

if [ -z $(which bazel) ]; then
echo "bazel is required to build flatc"
exit 1
fi

echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"

if [ ! -e $FB_DIR ]; then
echo "git clone $FB_URL ..."
git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
if [ -z "$FLATC" ]; then
echo "Build flatc from source ..."

FB_URL="https://github.com/google/flatbuffers"
FB_DIR="arrow/.flatbuffers"
FLATC="$FB_DIR/bazel-bin/flatc"

if [ -z $(which bazel) ]; then
echo "bazel is required to build flatc"
exit 1
fi

echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"

if [ ! -e $FB_DIR ]; then
echo "git clone $FB_URL ..."
git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
else
echo "git pull $FB_URL ..."
git -C $FB_DIR pull
fi

pushd $FB_DIR
echo "run: bazel build :flatc ..."
bazel build :flatc
popd
else
echo "git pull $FB_URL ..."
git -C $FB_DIR pull
echo "Using flatc $FLATC ..."
fi

pushd $FB_DIR
echo "run: bazel build :flatc ..."
bazel build :flatc
popd


# Execute the code generation:
$FLATC --filename-suffix "" --rust -o arrow-ipc/src/gen/ format/*.fbs

Expand Down Expand Up @@ -99,37 +102,38 @@ for f in `ls *.rs`; do
fi

echo "Modifying: $f"
sed -i '' '/extern crate flatbuffers;/d' $f
sed -i '' '/use self::flatbuffers::EndianScalar;/d' $f
sed -i '' '/\#\[allow(unused_imports, dead_code)\]/d' $f
sed -i '' '/pub mod org {/d' $f
sed -i '' '/pub mod apache {/d' $f
sed -i '' '/pub mod arrow {/d' $f
sed -i '' '/pub mod flatbuf {/d' $f
sed -i '' '/} \/\/ pub mod flatbuf/d' $f
sed -i '' '/} \/\/ pub mod arrow/d' $f
sed -i '' '/} \/\/ pub mod apache/d' $f
sed -i '' '/} \/\/ pub mod org/d' $f
sed -i '' '/use core::mem;/d' $f
sed -i '' '/use core::cmp::Ordering;/d' $f
sed -i '' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f
sed --in-place='' '/extern crate flatbuffers;/d' $f
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why but bash/sed produced an error for me until I made this substitution

sed: can't read /extern crate flatbuffers;/d: No such file or directory

Reading the man page, it seems like the expected invocation for the short flag is -ibackup, equivalent to --inplace=backup (or -i, equivalent to --inplace='')

       -i[SUFFIX], --in-place[=SUFFIX]

              edit files in place (makes backup if SUFFIX supplied)

Should I replace this with

Suggested change
sed --in-place='' '/extern crate flatbuffers;/d' $f
sed -i '/extern crate flatbuffers;/d' $f

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC Mac's sed (BSD utils) and the Linux coreutils sed behave differently. It's a bit of a pain.

sed --in-place='' '/use self::flatbuffers::EndianScalar;/d' $f
sed --in-place='' '/\#\[allow(unused_imports, dead_code)\]/d' $f
sed --in-place='' '/pub mod org {/d' $f
sed --in-place='' '/pub mod apache {/d' $f
sed --in-place='' '/pub mod arrow {/d' $f
sed --in-place='' '/pub mod flatbuf {/d' $f
sed --in-place='' '/} \/\/ pub mod flatbuf/d' $f
sed --in-place='' '/} \/\/ pub mod arrow/d' $f
sed --in-place='' '/} \/\/ pub mod apache/d' $f
sed --in-place='' '/} \/\/ pub mod org/d' $f
sed --in-place='' '/use core::mem;/d' $f
sed --in-place='' '/use core::cmp::Ordering;/d' $f
sed --in-place='' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f

# required by flatc 1.12.0+
sed -i '' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
sed --in-place='' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
for name in ${names[@]}; do
sed -i '' "/use crate::${name}::\*;/d" $f
sed -i '' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
sed --in-place='' "/use crate::${name}::\*;/d" $f
sed --in-place='' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
done

# Replace all occurrences of "type__" with "type_", "TYPE__" with "TYPE_".
sed -i '' 's/type__/type_/g' $f
sed -i '' 's/TYPE__/TYPE_/g' $f
sed --in-place='' 's/type__/type_/g' $f
sed --in-place='' 's/TYPE__/TYPE_/g' $f

# Some files need prefixes
if [[ $f == "File.rs" ]]; then
# Now prefix the file with the static contents
echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f
elif [[ $f == "Message.rs" ]]; then
sed --in-place='' 's/List<Int16>/\`List<Int16>\`/g' $f
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this was another manual edit of the generated files. Moving it here should simplify the next time regen.sh is used. Are there any other manual edits which I haven't noticed?

echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${SPARSE_TENSOR_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
elif [[ $f == "SparseTensor.rs" ]]; then
echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
Expand Down
26 changes: 16 additions & 10 deletions arrow-ipc/src/gen/File.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ use flatbuffers::EndianScalar;
use std::{cmp::Ordering, mem};
// automatically generated by the FlatBuffers compiler, do not modify

// @generated

// struct Block, aligned to 8
#[repr(transparent)]
#[derive(Clone, Copy, PartialEq)]
Expand Down Expand Up @@ -64,6 +66,10 @@ impl<'b> flatbuffers::Push for Block {
let src = ::core::slice::from_raw_parts(self as *const Block as *const u8, Self::size());
dst.copy_from_slice(src);
}
#[inline]
fn alignment() -> flatbuffers::PushAlignment {
flatbuffers::PushAlignment::new(8)
}
}

impl<'a> flatbuffers::Verifiable for Block {
Expand Down Expand Up @@ -211,8 +217,8 @@ impl<'a> Footer<'a> {
Footer { _tab: table }
}
#[allow(unused_mut)]
pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
_fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
_fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
args: &'args FooterArgs<'args>,
) -> flatbuffers::WIPOffset<Footer<'bldr>> {
let mut builder = FooterBuilder::new(_fbb);
Expand Down Expand Up @@ -344,11 +350,11 @@ impl<'a> Default for FooterArgs<'a> {
}
}

pub struct FooterBuilder<'a: 'b, 'b> {
fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
pub struct FooterBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
}
impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FooterBuilder<'a, 'b, A> {
#[inline]
pub fn add_version(&mut self, version: MetadataVersion) {
self.fbb_
Expand Down Expand Up @@ -388,7 +394,7 @@ impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
);
}
#[inline]
pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FooterBuilder<'a, 'b> {
pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> FooterBuilder<'a, 'b, A> {
let start = _fbb.start_table();
FooterBuilder {
fbb_: _fbb,
Expand Down Expand Up @@ -474,16 +480,16 @@ pub unsafe fn size_prefixed_root_as_footer_unchecked(buf: &[u8]) -> Footer {
flatbuffers::size_prefixed_root_unchecked::<Footer>(buf)
}
#[inline]
pub fn finish_footer_buffer<'a, 'b>(
fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
pub fn finish_footer_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
root: flatbuffers::WIPOffset<Footer<'a>>,
) {
fbb.finish(root, None);
}

#[inline]
pub fn finish_size_prefixed_footer_buffer<'a, 'b>(
fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
pub fn finish_size_prefixed_footer_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
root: flatbuffers::WIPOffset<Footer<'a>>,
) {
fbb.finish_size_prefixed(root, None);
Expand Down
Loading
Loading