Skip to content

Add LSH implementation for vector embedding indexing #1106

Add LSH implementation for vector embedding indexing

Add LSH implementation for vector embedding indexing #1106

Workflow file for this run

# SPDX-FileCopyrightText: 2023 LakeSoul Contributors
#
# SPDX-License-Identifier: Apache-2.0
# This workflow will build a Java project with Maven, and cache/restore any dependencies to improve the workflow execution time
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
name: CI with Maven Test
on:
push:
paths-ignore:
- "javadoc/**"
- "website/**"
- "cpp/**"
- "python/**"
- "**.md"
branches:
- 'main'
pull_request:
paths-ignore:
- "javadoc/**"
- "website/**"
- "cpp/**"
- "python/**"
- "**.md"
branches:
- 'main'
- 'release/**'
workflow_dispatch:
jobs:
build-rust-linux-x86_64:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: maven
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
default: true
- uses: Swatinem/rust-cache@v2
with:
workspaces: "./rust -> target"
- name: Cache Docker images
uses: ScribeMD/[email protected]
with:
key: docker-${{ runner.os }}-${{ hashFiles('rust/Cross.toml') }}
- uses: actions-rs/cargo@v1
with:
use-cross: true
command: build
args: '--manifest-path rust/Cargo.toml --target x86_64-unknown-linux-gnu --release --all-features'
- uses: actions/upload-artifact@v4
with:
name: lakesoul-nativeio-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/x86_64-unknown-linux-gnu/release/liblakesoul_io_c.so
- uses: actions/upload-artifact@v4
with:
name: lakesoul-nativemetadata-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/x86_64-unknown-linux-gnu/release/liblakesoul_metadata_c.so
spark-test-1:
runs-on: ubuntu-latest
needs: [ build-rust-linux-x86_64 ]
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres:14.5
# Provide the password for postgres
env:
POSTGRES_PASSWORD: lakesoul_test
POSTGRES_USER: lakesoul_test
POSTGRES_DB: lakesoul_test
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
--name lakesoul-test-pg
ports:
# Maps tcp port 5432 on service container to the host
- 5432:5432
steps:
- uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: maven
- name: Install psql
run: sudo apt-get install -y postgresql-client-14
- name: Init PG
run: |
./script/meta_init_for_local_test.sh -j 2
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
version: "23.x"
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativemetadata-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativeio-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- name: Build with Maven
run: |
mvn -B test -pl lakesoul-spark -am -Pcross-build -Pparallel-test --file pom.xml -Dtest='UpdateScalaSuite,AlterTableByNameSuite,ReadSuite,UpdateSQLSuite,ParquetNativeFilterSuite,DeleteScalaSuite,DeleteSQLSuite,ParquetV2FilterSuite,ParquetScanSuite,UpsertSuiteBase' -Dsurefire.failIfNoSpecifiedTests=false
- name: Generate Report Site
if: always()
run: |
mvn surefire-report:report-only -pl lakesoul-spark -am
- name: Upload Test Report
if: always()
continue-on-error: true
uses: actions/upload-artifact@v4
with:
name: maven-test-report-artifact-spark-1
path: lakesoul-spark/target/site
retention-days: 5
if-no-files-found: error
spark-test-2:
runs-on: ubuntu-latest
needs: [ build-rust-linux-x86_64 ]
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres:14.5
# Provide the password for postgres
env:
POSTGRES_PASSWORD: lakesoul_test
POSTGRES_USER: lakesoul_test
POSTGRES_DB: lakesoul_test
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
--name lakesoul-test-pg
ports:
# Maps tcp port 5432 on service container to the host
- 5432:5432
steps:
- uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: maven
- name: Install psql
run: sudo apt-get install -y postgresql-client-14
- name: Init PG
run: |
./script/meta_init_for_local_test.sh -j 2
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
version: "23.x"
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativemetadata-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativeio-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- name: Build with Maven
run: |
mvn -B test -pl lakesoul-spark -am -Pcross-build -Pparallel-test --file pom.xml -Dtest='!UpdateScalaSuite,!AlterTableByNameSuite,!ReadSuite,!UpdateSQLSuite,!ParquetNativeFilterSuite,!DeleteScalaSuite,!DeleteSQLSuite,!ParquetV2FilterSuite,!ParquetScanSuite,!UpsertSuiteBase,!RBACOperationSuite' -Dsurefire.failIfNoSpecifiedTests=false
- name: Generate Report Site
if: always()
run: |
mvn surefire-report:report-only -pl lakesoul-spark -am
- name: Upload Test Report
if: always()
continue-on-error: true
uses: actions/upload-artifact@v4
with:
name: maven-test-report-artifact-spark-2
path: lakesoul-spark/target/site
retention-days: 5
if-no-files-found: error
spark-test-rbac:
runs-on: ubuntu-latest
needs: [ build-rust-linux-x86_64 ]
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres:14.5
# Provide the password for postgres
env:
POSTGRES_PASSWORD: lakesoul_test
POSTGRES_USER: lakesoul_test
POSTGRES_DB: lakesoul_test
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
--name lakesoul-test-pg
ports:
# Maps tcp port 5432 on service container to the host
- 5432:5432
steps:
- uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: maven
- name: Install psql
run: sudo apt-get install -y postgresql-client-14
- name: Init PG
run: |
./script/meta_init_for_local_test.sh -j 1
- name: Init PG RBAC
run: |
./script/meta_rbac_init_for_local_test.sh -j 1
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
version: "23.x"
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: beyondstorage/setup-hdfs@master
with:
hdfs-version: '3.3.6'
- name: Modify HDFS User Group Mapping
run: |
sed -i '/^<\/configuration>/i <property><name>hadoop.user.group.static.mapping.overrides</name><value>admin1=domain1;user1=domain1;user2=domain1;admin2=domain2</value></property>' $HADOOP_HOME/etc/hadoop/core-site.xml
$HADOOP_HOME/sbin/stop-dfs.sh
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/bin/hadoop fs -chmod -R 777 /
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativemetadata-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativeio-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- name: Build with Maven
run: |
mvn -B test -pl lakesoul-spark -am -Pcross-build --file pom.xml -Dtest='RBACOperationSuite' -Dsurefire.failIfNoSpecifiedTests=false
- name: Generate Report Site
if: always()
run: |
mvn surefire-report:report-only -pl lakesoul-spark -am
- name: Upload Test Report
if: always()
continue-on-error: true
uses: actions/upload-artifact@v4
with:
name: maven-test-report-artifact-spark-3
path: lakesoul-spark/target/site
retention-days: 5
if-no-files-found: error
flink-test-1:
runs-on: ubuntu-latest
needs: [ build-rust-linux-x86_64 ]
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres:14.5
# Provide the password for postgres
env:
POSTGRES_PASSWORD: lakesoul_test
POSTGRES_USER: lakesoul_test
POSTGRES_DB: lakesoul_test
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
--name lakesoul-test-pg
ports:
# Maps tcp port 5432 on service container to the host
- 5432:5432
steps:
- uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: maven
- name: Install psql
run: sudo apt-get install -y postgresql-client-14
- name: Init PG
run: |
PGPASSWORD=lakesoul_test psql -h localhost -p 5432 -U lakesoul_test -f script/meta_init.sql lakesoul_test
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
version: "23.x"
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativemetadata-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativeio-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- name: Build with Maven
run: |
MAVEN_OPTS="-Xmx5g" mvn -B clean test -pl lakesoul-flink -am -Pcross-build --file pom.xml -Dtest='!LakeSoulRBACTest' -Dsurefire.failIfNoSpecifiedTests=false
- name: Generate Report Site
if: always()
run: |
mvn surefire-report:report-only -pl lakesoul-flink -am
- name: Upload Test Report
if: always()
continue-on-error: true
uses: actions/upload-artifact@v4
with:
name: maven-test-report-artifact-flink-1
path: lakesoul-flink/target/site
retention-days: 5
if-no-files-found: error
flink-test-rbac:
runs-on: ubuntu-latest
needs: [ build-rust-linux-x86_64 ]
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres:14.5
# Provide the password for postgres
env:
POSTGRES_PASSWORD: lakesoul_test
POSTGRES_USER: lakesoul_test
POSTGRES_DB: lakesoul_test
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
--name lakesoul-test-pg
ports:
# Maps tcp port 5432 on service container to the host
- 5432:5432
steps:
- uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'temurin'
cache: maven
- name: Install psql
run: sudo apt-get install -y postgresql-client-14
- name: Init PG
run: |
PGPASSWORD=lakesoul_test psql -h localhost -p 5432 -U lakesoul_test -f script/meta_init.sql lakesoul_test
- name: Init PG RBAC ROW POLICY
run: |
PGPASSWORD=lakesoul_test psql -h localhost -p 5432 -U lakesoul_test -f script/meta_rbac_init.sql lakesoul_test
- name: Init PG RBAC DOMAINS
run: |
PGPASSWORD=lakesoul_test psql -h localhost -p 5432 -U lakesoul_test -f script/meta_rbac_init_domains_for_test.sql lakesoul_test
- name: Init PG RBAC USERS
run: |
PGPASSWORD=lakesoul_test psql -h localhost -p 5432 -U lakesoul_test -f script/meta_rbac_init_users_for_test.sql lakesoul_test
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
version: "23.x"
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: beyondstorage/setup-hdfs@master
with:
hdfs-version: '3.3.6'
- name: Modify HDFS User Group Mapping
run: |
sed -i '/^<\/configuration>/i <property><name>hadoop.user.group.static.mapping.overrides</name><value>admin1=domain1;user1=domain1;user2=domain1;admin2=domain2</value></property>' $HADOOP_HOME/etc/hadoop/core-site.xml
$HADOOP_HOME/sbin/stop-dfs.sh
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/bin/hadoop fs -chmod -R 777 /
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativemetadata-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- uses: actions/download-artifact@v4
with:
name: lakesoul-nativeio-x86_64-unknown-linux-gnu-maven-test
path: ./rust/target/release/
- name: Build with Maven
run: |
MAVEN_OPTS="-Xmx5g" mvn -B clean test -pl lakesoul-flink -am -Pcross-build --file pom.xml -Dtest='LakeSoulRBACTest' -Dsurefire.failIfNoSpecifiedTests=false
- name: Generate Report Site
if: always()
run: |
mvn surefire-report:report-only -pl lakesoul-flink -am
- name: Upload Test Report
if: always()
continue-on-error: true
uses: actions/upload-artifact@v4
with:
name: maven-test-report-artifact-flink-2
path: lakesoul-flink/target/site
retention-days: 5
if-no-files-found: error