-
Notifications
You must be signed in to change notification settings - Fork 401
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: zenghua <[email protected]>
- Loading branch information
zenghua
committed
Feb 4, 2024
1 parent
4e4b415
commit e50534d
Showing
16 changed files
with
613 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
# SPDX-FileCopyrightText: 2023 LakeSoul Contributors | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
on: | ||
push: | ||
paths: | ||
- "rust/**" | ||
branches: | ||
- 'main' | ||
pull_request: | ||
paths: | ||
- "rust/**" | ||
branches: | ||
- 'main' | ||
- 'release/**' | ||
workflow_dispatch: | ||
|
||
name: Consistency CI | ||
|
||
env: | ||
RUSTFLAGS: "-Awarnings" | ||
|
||
permissions: | ||
actions: write | ||
|
||
jobs: | ||
verify-hash-consistency: | ||
runs-on: ubuntu-latest | ||
services: | ||
# Label used to access the service container | ||
postgres: | ||
# Docker Hub image | ||
image: postgres:14.5 | ||
# Provide the password for postgres | ||
env: | ||
POSTGRES_PASSWORD: lakesoul_test | ||
POSTGRES_USER: lakesoul_test | ||
POSTGRES_DB: lakesoul_test | ||
# Set health checks to wait until postgres has started | ||
options: >- | ||
--health-cmd pg_isready | ||
--health-interval 10s | ||
--health-timeout 5s | ||
--health-retries 5 | ||
--name lakesoul-test-pg | ||
ports: | ||
# Maps tcp port 5432 on service container to the host | ||
- 5432:5432 | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Install requirement | ||
uses: ConorMacBride/install-package@v1 | ||
with: | ||
apt: postgresql-client-14 cargo | ||
- name: Init PG | ||
run: | | ||
./script/meta_init_for_local_test.sh -j 2 | ||
- name: Set up JDK 8 | ||
uses: actions/setup-java@v4 | ||
with: | ||
java-version: '8' | ||
distribution: 'temurin' | ||
cache: maven | ||
- name: Set up Python 3.9 | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.9' | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip setuptools wheel | ||
pip install pymysql cryptography jproperties --no-cache-dir | ||
- name: Install Protoc | ||
uses: arduino/setup-protoc@v2 | ||
with: | ||
version: "23.x" | ||
repo-token: ${{ secrets.GITHUB_TOKEN }} | ||
- uses: vemonet/setup-spark@v1 | ||
with: | ||
spark-version: '3.3.1' | ||
hadoop-version: '3' | ||
- run: spark-submit --version | ||
|
||
- uses: actions-rs/toolchain@v1 | ||
with: | ||
profile: minimal | ||
toolchain: nightly-2023-05-20 | ||
default: true | ||
- uses: Swatinem/rust-cache@v2 | ||
with: | ||
workspaces: "./rust -> target" | ||
- uses: actions-rs/cargo@v1 | ||
with: | ||
use-cross: true | ||
command: build | ||
args: '--manifest-path rust/Cargo.toml --target x86_64-unknown-linux-gnu --release --all-features' | ||
- name: Build with Maven | ||
run: | | ||
mkdir -p rust/target/release | ||
cp rust/target/x86_64-unknown-linux-gnu/release/liblakesoul_io_c.so rust/target/release | ||
cp rust/target/x86_64-unknown-linux-gnu/release/liblakesoul_metadata_c.so rust/target/release | ||
MAVEN_OPTS="-Xmx4000m" mvn -q -B package -f pom.xml -Pcross-build -DskipTests | ||
- name: Get jar names | ||
run: | | ||
echo "FLINK_JAR_NAME=$(python script/get_jar_name.py lakesoul-flink)" >> $GITHUB_ENV | ||
echo "FLINK_TEST_JAR_NAME=$(python script/get_jar_name.py lakesoul-flink | sed -e 's/.jar/-tests.jar/g')" >> $GITHUB_ENV | ||
echo "SPARK_JAR_NAME=$(python script/get_jar_name.py lakesoul-spark)" >> $GITHUB_ENV | ||
echo "SPARK_TEST_JAR_NAME=$(python script/get_jar_name.py lakesoul-spark | sed -e 's/.jar/-tests.jar/g')" >> $GITHUB_ENV | ||
- name: Copy built jar to work-dir | ||
run: | | ||
cp ./lakesoul-flink/target/$FLINK_JAR_NAME ./script/ci/work-dir | ||
cp ./lakesoul-flink/target/$FLINK_TEST_JAR_NAME ./script/ci/work-dir | ||
cp ./lakesoul-spark/target/$SPARK_JAR_NAME ./script/ci/work-dir | ||
cp ./lakesoul-spark/target/$SPARK_TEST_JAR_NAME ./script/ci/work-dir | ||
- name: Generate benchmark data and expected query results | ||
run: | | ||
mkdir -p lakesoul/test_files/tpch/data | ||
git clone https://github.com/databricks/tpch-dbgen.git | ||
cd tpch-dbgen | ||
make | ||
./dbgen -f -s 0.001 | ||
mv *.tbl ../lakesoul/test_files/tpch/data | ||
- name: Verify that benchmark queries return expected results | ||
run: | | ||
export TPCH_DATA=`realpath lakesoul/test_files/tpch/data` | ||
cd ./rust | ||
# use release build for plan verificaton because debug build causes stack overflow | ||
cargo test load_tpch_data --package lakesoul-datafusion --features=ci -- --nocapture | ||
PGPASSWORD='lakesoul_test' psql -U lakesoul_test -h 127.0.0.1 -p 5432 -d lakesoul_test -c "select * from table_info;" | ||
PGPASSWORD='lakesoul_test' psql -U lakesoul_test -h 127.0.0.1 -p 5432 -d lakesoul_test -c "select * from data_commit_info;" | ||
- name: Hash-Consistency Verification task | ||
run: | | ||
export TPCH_DATA=`realpath lakesoul/test_files/tpch/data` | ||
export lakesoul_home=`realpath script/ci/work-dir/lakesoul.properties` | ||
spark-submit --driver-memory 4G --executor-memory 4G --conf spark.driver.memoryOverhead=1500m --conf spark.executor.memoryOverhead=1500m --jars script/ci/work-dir/$SPARK_JAR_NAME --class org.apache.spark.sql.lakesoul.benchmark.ConsistencyCI --master local[4] script/ci/work-dir/$SPARK_TEST_JAR_NAME | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
164 changes: 164 additions & 0 deletions
164
lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/ConsistencyCI.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
package org.apache.spark.sql.lakesoul.benchmark | ||
|
||
import org.apache.spark.sql.SparkSession | ||
import org.apache.spark.sql.internal.SQLConf | ||
import org.apache.spark.sql.lakesoul.catalog.LakeSoulCatalog | ||
import org.apache.spark.sql.types._ | ||
|
||
object ConsistencyCI { | ||
|
||
|
||
val tpchTable = Seq( | ||
("customer", | ||
StructType(Array( | ||
StructField("c_custkey", LongType, nullable = false), | ||
StructField("c_name", StringType, nullable = false), | ||
StructField("c_address", StringType, nullable = false), | ||
StructField("c_nationkey", LongType, nullable = false), | ||
StructField("c_phone", StringType, nullable = false), | ||
StructField("c_acctbal", DecimalType(15, 2), nullable = false), | ||
StructField("c_mktsegment", StringType, nullable = false), | ||
StructField("c_comment", StringType, nullable = false), | ||
)), | ||
"c_custkey, c_name"), | ||
("part", | ||
StructType(Array( | ||
StructField("p_partkey", LongType, nullable = false), | ||
StructField("p_name", StringType, nullable = false), | ||
StructField("p_mfgr", StringType, nullable = false), | ||
StructField("p_brand", StringType, nullable = false), | ||
StructField("p_type", StringType, nullable = false), | ||
StructField("p_size", IntegerType, nullable = false), | ||
StructField("p_container", StringType, nullable = false), | ||
StructField("p_retailprice", DecimalType(15, 2), nullable = false), | ||
StructField("p_comment", StringType, nullable = false), | ||
)), | ||
"p_partkey, p_name"), | ||
("supplier", | ||
StructType(Array( | ||
StructField("s_suppkey", LongType, nullable = false), | ||
StructField("s_name", StringType, nullable = false), | ||
StructField("s_address", StringType, nullable = false), | ||
StructField("s_nationkey", LongType, nullable = false), | ||
StructField("s_phone", StringType, nullable = false), | ||
StructField("s_acctbal", DecimalType(15, 2), nullable = false), | ||
StructField("s_comment", StringType, nullable = false), | ||
)), | ||
"s_suppkey, s_name"), | ||
("partsupp", | ||
StructType(Array( | ||
StructField("ps_partkey", LongType, nullable = false), | ||
StructField("ps_suppkey", LongType, nullable = false), | ||
StructField("ps_availqty", IntegerType, nullable = false), | ||
StructField("ps_supplycost", DecimalType(15, 2), nullable = false), | ||
StructField("ps_comment", StringType, nullable = false), | ||
)), | ||
"ps_partkey, ps_suppkey"), | ||
("orders", | ||
StructType(Array( | ||
StructField("o_orderkey", LongType, nullable = false), | ||
StructField("o_custkey", LongType, nullable = false), | ||
StructField("o_orderstatus", StringType, nullable = false), | ||
StructField("o_totalprice", DecimalType(15, 2), nullable = false), | ||
StructField("o_orderdate", DateType, nullable = false), | ||
StructField("o_orderpriority", StringType, nullable = false), | ||
StructField("o_clerk", StringType, nullable = false), | ||
StructField("o_shippriority", IntegerType, nullable = false), | ||
StructField("o_comment", StringType, nullable = false), | ||
)), | ||
"o_orderkey, o_custkey"), | ||
|
||
("nation", | ||
StructType(Array( | ||
StructField("n_nationkey", LongType, nullable = false), | ||
StructField("n_name", StringType, nullable = false), | ||
StructField("n_regionkey", LongType, nullable = false), | ||
StructField("n_comment", StringType, nullable = false), | ||
)), | ||
"n_nationkey, n_name"), | ||
("region", | ||
StructType(Array( | ||
StructField("r_regionkey", LongType, nullable = false), | ||
StructField("r_name", StringType, nullable = false), | ||
StructField("r_comment", StringType, nullable = false), | ||
)), | ||
"r_regionkey, r_name"), | ||
("lineitem", | ||
StructType(Array( | ||
StructField("l_orderkey", LongType, nullable = false), | ||
StructField("l_partkey", LongType, nullable = false), | ||
StructField("l_suppkey", LongType, nullable = false), | ||
StructField("l_linenumber", IntegerType, nullable = false), | ||
StructField("l_quantity", DecimalType(15, 2), nullable = false), | ||
StructField("l_extendedprice", DecimalType(15, 2), nullable = false), | ||
StructField("l_discount", DecimalType(15, 2), nullable = false), | ||
StructField("l_tax", DecimalType(15, 2), nullable = false), | ||
StructField("l_returnflag", StringType, nullable = false), | ||
StructField("l_linestatus", StringType, nullable = false), | ||
StructField("l_shipdate", DateType, nullable = false), | ||
StructField("l_commitdate", DateType, nullable = false), | ||
StructField("l_receiptdate", DateType, nullable = false), | ||
StructField("l_shipinstruct", StringType, nullable = false), | ||
StructField("l_shipmode", StringType, nullable = false), | ||
StructField("l_comment", StringType, nullable = false), | ||
)), | ||
"l_orderkey, l_partkey"), | ||
) | ||
|
||
def load_data(spark: SparkSession): Unit = { | ||
|
||
val tpchPath = System.getenv("TPCH_DATA") | ||
val lakeSoulPath = "/tmp/lakesoul/tpch" | ||
tpchTable.foreach(tup => { | ||
val (name, schema, hashPartitions) = tup | ||
val df = spark.read.option("delimiter", "|") | ||
.schema(schema) | ||
.csv(s"$tpchPath/$name.tbl") | ||
// df.show | ||
df.write.format("lakesoul") | ||
.option("shortTableName", name) | ||
.option("hashPartitions", hashPartitions) | ||
.option("hashBucketNum", 5) | ||
.mode("Overwrite") | ||
.save(s"$lakeSoulPath/$name") | ||
}) | ||
|
||
} | ||
|
||
def main(args: Array[String]): Unit = { | ||
val builder = SparkSession.builder() | ||
.appName("Consistency CI") | ||
.master("local[4]") | ||
.config("spark.sql.extensions", "com.dmetasoul.lakesoul.sql.LakeSoulSparkSessionExtension") | ||
.config("spark.sql.catalog.lakesoul", classOf[LakeSoulCatalog].getName) | ||
.config(SQLConf.DEFAULT_CATALOG.key, LakeSoulCatalog.CATALOG_NAME) | ||
.config("spark.default.parallelism", "16") | ||
.config("spark.sql.parquet.binaryAsString", "true") | ||
|
||
val spark = builder.getOrCreate() | ||
spark.sparkContext.setLogLevel("ERROR") | ||
|
||
spark.sql("show tables") | ||
spark.sql("CREATE NAMESPACE IF NOT EXISTS tpch") | ||
spark.sql("USE tpch") | ||
|
||
// load_data(spark) | ||
|
||
tpchTable.foreach(tup => { | ||
// val sparkDF = spark.sql(s"select * from ${tup._1}") | ||
val rustDF = spark.sql(s"select * from default.${tup._1}") | ||
rustDF.show() | ||
// val diff1 = sparkDF.rdd.subtract(rustDF.rdd) | ||
// val diff2 = rustDF.rdd.subtract(sparkDF.rdd) | ||
// val result = diff1.count() == 0 && diff2.count() == 0 | ||
// if (!result) { | ||
// println("sparkDF: ") | ||
// println(sparkDF.collectAsList()) | ||
// println("rustDF: ") | ||
// println(rustDF.collectAsList()) | ||
// System.exit(1) | ||
// } | ||
}) | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
// SPDX-FileCopyrightText: 2023 LakeSoul Contributors | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
pub mod tpch; |
Oops, something went wrong.