From e053387c3af72d0c7330af44e12c02aac8d48896 Mon Sep 17 00:00:00 2001 From: Xu Chen Date: Fri, 29 Mar 2024 11:03:51 +0800 Subject: [PATCH] adjust pom (#462) Signed-off-by: chenxu Co-authored-by: chenxu --- lakesoul-common/pom.xml | 12 ++++++- lakesoul-flink/pom.xml | 12 ++++++- lakesoul-presto/pom.xml | 12 ++++++- lakesoul-spark/pom.xml | 32 ++++++++++++++++++- native-io/lakesoul-io-java/pom.xml | 10 ++++++ pom.xml | 31 ++++++------------ .../01-Getting Started/01-setup-local-env.md | 4 +-- .../01-Getting Started/02-spark-guide.mdx | 4 +-- .../02-Tutorials/02-flink-cdc-sink/index.md | 2 +- .../07-kafka-topics-data-to-lakesoul.md | 4 +-- website/docs/03-Usage Docs/02-setup-spark.md | 12 +++---- .../docs/03-Usage Docs/05-flink-cdc-sync.md | 10 +++--- .../06-flink-lakesoul-connector.md | 4 +-- .../03-Usage Docs/08-auto-compaction-task.md | 2 +- .../03-Usage Docs/09-clean-redundant-data.md | 2 +- website/docs/03-Usage Docs/13-setup-kyuubi.md | 4 +-- .../03-Usage Docs/14-export-to-databases.md | 6 ++-- .../03-Usage Docs/15-spark-gluten/index.md | 2 +- .../01-Getting Started/01-setup-local-env.md | 8 ++--- .../01-Getting Started/02-spark-guide.mdx | 4 +-- .../02-Tutorials/02-flink-cdc-sink/index.md | 4 +-- .../07-kafka-topics-data-to-lakesoul.md | 4 +-- .../current/03-Usage Docs/02-setup-spark.md | 14 ++++---- .../03-Usage Docs/05-flink-cdc-sync.md | 12 +++---- .../06-flink-lakesoul-connector.md | 4 +-- .../03-Usage Docs/08-auto-compaction-task.md | 2 +- .../03-Usage Docs/09-clean-redundant-data.md | 2 +- .../current/03-Usage Docs/13-setup-kyuubi.md | 4 +-- .../03-Usage Docs/14-export-to-databases.md | 6 ++-- .../03-Usage Docs/15-spark-gluten/index.md | 2 +- 30 files changed, 144 insertions(+), 87 deletions(-) diff --git a/lakesoul-common/pom.xml b/lakesoul-common/pom.xml index ce0202164..a2a62f404 100644 --- a/lakesoul-common/pom.xml +++ b/lakesoul-common/pom.xml @@ -15,6 +15,7 @@ SPDX-License-Identifier: Apache-2.0 ${revision} 4.0.0 + LakeSoul Common lakesoul-common ${revision} @@ -176,9 +177,18 @@ SPDX-License-Identifier: Apache-2.0 org.codehaus.mojo flatten-maven-plugin - + 1.6.0 oss + + keep + keep + resolve + remove + remove + remove + remove + diff --git a/lakesoul-flink/pom.xml b/lakesoul-flink/pom.xml index f104239de..b95bac150 100644 --- a/lakesoul-flink/pom.xml +++ b/lakesoul-flink/pom.xml @@ -15,9 +15,10 @@ SPDX-License-Identifier: Apache-2.0 ${revision} 4.0.0 + LakeSoul Flink lakesoul-flink - flink-1.17-${revision} + 1.17-${revision} 1.17.1 2.12 @@ -694,6 +695,15 @@ SPDX-License-Identifier: Apache-2.0 oss + + keep + keep + resolve + remove + remove + remove + remove + diff --git a/lakesoul-presto/pom.xml b/lakesoul-presto/pom.xml index 821c7016d..9f5c9bdd0 100644 --- a/lakesoul-presto/pom.xml +++ b/lakesoul-presto/pom.xml @@ -14,9 +14,10 @@ lakesoul-parent ${revision} + LakeSoul Presto lakesoul-presto - presto-0.28-${revision} + 0.28-${revision} UTF-8 @@ -149,6 +150,15 @@ oss + + keep + keep + resolve + remove + remove + remove + remove + diff --git a/lakesoul-spark/pom.xml b/lakesoul-spark/pom.xml index 8d4beb039..75506cf11 100644 --- a/lakesoul-spark/pom.xml +++ b/lakesoul-spark/pom.xml @@ -15,9 +15,10 @@ SPDX-License-Identifier: Apache-2.0 ${revision} 4.0.0 + LakeSoul Spark lakesoul-spark - spark-3.3-${revision} + 3.3-${revision} 3.2.14 @@ -31,6 +32,26 @@ SPDX-License-Identifier: Apache-2.0 2 + + + central + Maven Central + default + https://repo1.maven.org/maven2 + + true + + + false + + + + confluent + Confluent + https://packages.confluent.io/maven/ + + + @@ -540,6 +561,15 @@ SPDX-License-Identifier: Apache-2.0 oss + + keep + keep + resolve + remove + remove + remove + remove + diff --git a/native-io/lakesoul-io-java/pom.xml b/native-io/lakesoul-io-java/pom.xml index 173e1072b..732ce5c50 100644 --- a/native-io/lakesoul-io-java/pom.xml +++ b/native-io/lakesoul-io-java/pom.xml @@ -20,6 +20,7 @@ SPDX-License-Identifier: Apache-2.0 lakesoul-io-java ${revision} jar + LakeSoul IO Java 8 @@ -463,6 +464,15 @@ SPDX-License-Identifier: Apache-2.0 oss + + keep + keep + resolve + remove + remove + remove + remove + diff --git a/pom.xml b/pom.xml index c7905b9a0..4ccd732f8 100644 --- a/pom.xml +++ b/pom.xml @@ -19,7 +19,7 @@ SPDX-License-Identifier: Apache-2.0 lakesoul-presto pom - LakeSoul POM + LakeSoul A Table Structure Storage to Unify Batch and Streaming Data Processing https://github.com/lakesoul-io/LakeSoul @@ -73,26 +73,6 @@ SPDX-License-Identifier: Apache-2.0 - - - central - Maven Central - default - https://repo1.maven.org/maven2 - - true - - - false - - - - confluent - Confluent - https://packages.confluent.io/maven/ - - - junit @@ -223,9 +203,16 @@ SPDX-License-Identifier: Apache-2.0 org.codehaus.mojo flatten-maven-plugin - + 1.6.0 oss + + keep + keep + keep + resolve + remove + diff --git a/website/docs/01-Getting Started/01-setup-local-env.md b/website/docs/01-Getting Started/01-setup-local-env.md index faf3737d2..acb22cc82 100644 --- a/website/docs/01-Getting Started/01-setup-local-env.md +++ b/website/docs/01-Getting Started/01-setup-local-env.md @@ -91,7 +91,7 @@ spark.sql.catalog.lakesoul | org.apache.spark.sql.lakesoul.catalog.LakeSoulCatal spark.sql.defaultCatalog | lakesoul | set default catalog for spark ### 1.5 Setup Flink environment -Download LakeSoul Flink jar: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-flink-1.17-VAR::VERSION.jar +Download LakeSoul Flink jar: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-1.17-VAR::VERSION.jar Download Flink: https://dlcdn.apache.org/flink/flink-1.17.2/flink-1.17.2-bin-scala_2.12.tgz @@ -101,7 +101,7 @@ Enter the Flink installation directory and execute the following command: ```shell export lakesoul_home=/opt/soft/pg.property && ./bin/start-cluster.sh -export lakesoul_home=/opt/soft/pg.property && ./bin/sql-client.sh embedded -j lakesoul-flink-flink-1.17-VAR::VERSION.jar +export lakesoul_home=/opt/soft/pg.property && ./bin/sql-client.sh embedded -j lakesoul-flink-1.17-VAR::VERSION.jar ``` #### 1.5.2 Write data to object storage service diff --git a/website/docs/01-Getting Started/02-spark-guide.mdx b/website/docs/01-Getting Started/02-spark-guide.mdx index 7c6131fd5..612d36d23 100644 --- a/website/docs/01-Getting Started/02-spark-guide.mdx +++ b/website/docs/01-Getting Started/02-spark-guide.mdx @@ -62,8 +62,8 @@ Include maven dependencies in your project: ```xml com.dmetasoul - lakesoul - spark-3.3-VAR::VERSION + lakesoul-spark + 3.3-VAR::VERSION ``` diff --git a/website/docs/02-Tutorials/02-flink-cdc-sink/index.md b/website/docs/02-Tutorials/02-flink-cdc-sink/index.md index 17cf58b0d..b61e70168 100644 --- a/website/docs/02-Tutorials/02-flink-cdc-sink/index.md +++ b/website/docs/02-Tutorials/02-flink-cdc-sink/index.md @@ -90,7 +90,7 @@ Submit a LakeSoul Flink CDC Sink job to the Flink cluster started above: ```bash ./bin/flink run -ys 1 -yjm 1G -ytm 2G \ -c org.apache.flink.lakesoul.entry.MysqlCdc\ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --source_db.host localhost \ --source_db.port 3306 \ --source_db.db_name test_cdc \ diff --git a/website/docs/02-Tutorials/07-kafka-topics-data-to-lakesoul.md b/website/docs/02-Tutorials/07-kafka-topics-data-to-lakesoul.md index 227691331..131cf2764 100644 --- a/website/docs/02-Tutorials/07-kafka-topics-data-to-lakesoul.md +++ b/website/docs/02-Tutorials/07-kafka-topics-data-to-lakesoul.md @@ -80,7 +80,7 @@ export lakesoul_home=./pg.properties && ./bin/spark-submit \ --driver-memory 4g \ --executor-memory 4g \ --master local[4] \ -./jars/lakesoul-spark-spark-3.3-VAR::VERSION.jar \ +./jars/lakesoul-spark-3.3-VAR::VERSION.jar \ localhost:9092 test.* /tmp/kafka/data /tmp/kafka/checkpoint/ kafka earliest false ``` @@ -157,6 +157,6 @@ export lakesoul_home=./pg.properties && ./bin/spark-submit \ --driver-memory 4g \ --executor-memory 4g \ --master local[4] \ -./jars/lakesoul-spark-spark-3.3-VAR::VERSION.jar \ +./jars/lakesoul-spark-3.3-VAR::VERSION.jar \ localhost:9092 test.* /tmp/kafka/data /tmp/kafka/checkpoint/ kafka earliest false http://localhost:8081 ``` \ No newline at end of file diff --git a/website/docs/03-Usage Docs/02-setup-spark.md b/website/docs/03-Usage Docs/02-setup-spark.md index 30604ccea..db084342a 100644 --- a/website/docs/03-Usage Docs/02-setup-spark.md +++ b/website/docs/03-Usage Docs/02-setup-spark.md @@ -16,14 +16,14 @@ To use `spark-shell`, `pyspark` or `spark-sql` shells, you should include LakeSo #### Use Maven Coordinates via --packages ```bash -spark-shell --packages com.dmetasoul:lakesoul-spark:spark-3.3-VAR::VERSION +spark-shell --packages com.dmetasoul:lakesoul-spark:3.3-VAR::VERSION ``` #### Use Local Packages You can find the LakeSoul packages from our release page: [Releases](https://github.com/lakesoul-io/LakeSoul/releases). Download the jar file and pass it to `spark-submit`. ```bash -spark-submit --jars "lakesoul-spark-spark-3.3-VAR::VERSION.jar" +spark-submit --jars "lakesoul-spark-3.3-VAR::VERSION.jar" ``` Or you could directly put the jar into `$SPARK_HOME/jars` @@ -33,8 +33,8 @@ Include maven dependencies in your project: ```xml com.dmetasoul - lakesoul - spark-3.3-VAR::VERSION + lakesoul-spark + 3.3-VAR::VERSION ``` @@ -144,7 +144,7 @@ taskmanager.memory.task.off-heap.size: 3000m ::: ### Add LakeSoul Jar to Flink's directory -Download LakeSoul Flink Jar from: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-flink-1.17-VAR::VERSION.jar +Download LakeSoul Flink Jar from: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-1.17-VAR::VERSION.jar And put the jar file under `$FLINK_HOME/lib`. After this, you could start flink session cluster or application as usual. @@ -155,6 +155,6 @@ Add the following to your project's pom.xml com.dmetasoul lakesoul - flink-1.17-VAR::VERSION + 1.17-VAR::VERSION ``` \ No newline at end of file diff --git a/website/docs/03-Usage Docs/05-flink-cdc-sync.md b/website/docs/03-Usage Docs/05-flink-cdc-sync.md index 8bf30c7b3..c6be06ae0 100644 --- a/website/docs/03-Usage Docs/05-flink-cdc-sync.md +++ b/website/docs/03-Usage Docs/05-flink-cdc-sync.md @@ -20,7 +20,7 @@ In the Stream API, the main functions of LakeSoul Sink are: ## How to use the command line ### 1. Download LakeSoul Flink Jar -It can be downloaded from the LakeSoul Release page: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-flink-1.17-VAR::VERSION.jar. +It can be downloaded from the LakeSoul Release page: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-1.17-VAR::VERSION.jar. The currently supported Flink version is 1.17. @@ -61,7 +61,7 @@ Description of required parameters: | Parameter | Meaning | Value Description | |----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------| | -c | The task runs the main function entry class | org.apache.flink.lakesoul.entry.MysqlCdc | -| Main package | Task running jar | lakesoul-flink-flink-1.17-VAR::VERSION.jar | +| Main package | Task running jar | lakesoul-flink-1.17-VAR::VERSION.jar | | --source_db.type | source database type | mysql postgres oracle | | --source_db.host | The address of the source database | | | --source_db.port | source database port | | @@ -94,7 +94,7 @@ For Mysql configuration, please refer to https://ververica.github.io/flink-cdc-c ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.JdbcCDC \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --source_db.db_name "testDB" \ --source_db.user "root" \ --source.parallelism 1 \ @@ -121,7 +121,7 @@ Synchronous oracle job example For oracle configuration,please refer to https://ververica.github.io/flink-cdc-connectors/release-2.4/content/connectors/oracle-cdc.html ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.JdbcCDC \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --source_db.db_type oracle \ --source_db.db_name "testDB" \ --source_db.user "FLINKUSER" \ @@ -153,7 +153,7 @@ For Postgresql, the following additional parameters need to be configured For Postgresql configuration,please refer to https://ververica.github.io/flink-cdc-connectors/release-2.4/content/connectors/postgres-cdc.html ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.JdbcCDC \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --source_db.db_name "postgres" \ --source_db.user "postgres" \ --source.parallelism 1 \ diff --git a/website/docs/03-Usage Docs/06-flink-lakesoul-connector.md b/website/docs/03-Usage Docs/06-flink-lakesoul-connector.md index 229bb90c0..9edbcb1a0 100644 --- a/website/docs/03-Usage Docs/06-flink-lakesoul-connector.md +++ b/website/docs/03-Usage Docs/06-flink-lakesoul-connector.md @@ -18,14 +18,14 @@ LakeSoul provides Flink Connector which implements the Dynamic Table interface, To setup Flink environment, please refer to [Setup Spark/Flink Job/Project](../03-Usage%20Docs/02-setup-spark.md) -Introduce LakeSoul dependency: download lakesoul-flink-flink-1.17-VAR::VERSION.jar. +Introduce LakeSoul dependency: download lakesoul-flink-1.17-VAR::VERSION.jar. In order to use Flink to create LakeSoul tables, it is recommended to use Flink SQL Client, which supports direct use of Flink SQL commands to operate LakeSoul tables. In this document, the Flink SQL is to directly enter statements on the Flink SQL Client cli interface; whereas the Table API needs to be used in a Java projects. Switch to the flink folder and execute the command to start the SQLclient client. ```bash # Start Flink SQL Client -bin/sql-client.sh embedded -j lakesoul-flink-flink-1.17-VAR::VERSION.jar +bin/sql-client.sh embedded -j lakesoul-flink-1.17-VAR::VERSION.jar ``` ## 2. DDL diff --git a/website/docs/03-Usage Docs/08-auto-compaction-task.md b/website/docs/03-Usage Docs/08-auto-compaction-task.md index 5b6a2d244..c674d2aed 100644 --- a/website/docs/03-Usage Docs/08-auto-compaction-task.md +++ b/website/docs/03-Usage Docs/08-auto-compaction-task.md @@ -47,7 +47,7 @@ The use the following command to start the compaction service job: --conf "spark.executor.extraJavaOptions=-XX:MaxDirectMemorySize=4G" \ --conf "spark.executor.memoryOverhead=3g" \ --class com.dmetasoul.lakesoul.spark.compaction.CompactionTask \ - jars/lakesoul-spark-spark-3.3-VAR::VERSION.jar + jars/lakesoul-spark-3.3-VAR::VERSION.jar --threadpool.size=10 --database=test ``` diff --git a/website/docs/03-Usage Docs/09-clean-redundant-data.md b/website/docs/03-Usage Docs/09-clean-redundant-data.md index ee569b5f9..5f65e64ac 100644 --- a/website/docs/03-Usage Docs/09-clean-redundant-data.md +++ b/website/docs/03-Usage Docs/09-clean-redundant-data.md @@ -88,7 +88,7 @@ Start the Spark cleanup command locally: --executor-cores 1 \ --num-executors 20 \ --class com.dmetasoul.lakesoul.spark.clean.CleanExpiredData \ - jars/lakesoul-spark-spark-3.3-VAR::VERSION.jar + jars/lakesoul-spark-3.3-VAR::VERSION.jar ``` :::tip diff --git a/website/docs/03-Usage Docs/13-setup-kyuubi.md b/website/docs/03-Usage Docs/13-setup-kyuubi.md index c4ac1906a..f3be1c28f 100644 --- a/website/docs/03-Usage Docs/13-setup-kyuubi.md +++ b/website/docs/03-Usage Docs/13-setup-kyuubi.md @@ -31,7 +31,7 @@ The operating environment is Linux, and Spark, Flink, and Kyuubi have been insta ### 1. Dependencies -Download LakeSoul Flink Jar from: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-flink-1.17-VAR::VERSION.jar +Download LakeSoul Flink Jar from: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-1.17-VAR::VERSION.jar And put the jar file under `$FLINK_HOME/lib`. @@ -74,7 +74,7 @@ More details about Flink SQL with LakeSoul refer to : [Flink Lakesoul Connector] ### 1. Dependencies -Download LakeSoul Spark Jar from: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-spark-spark-3.3-VAR::VERSION.jar +Download LakeSoul Spark Jar from: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-spark-3.3-VAR::VERSION.jar And put the jar file under `$SPARK_HOME/jars`. diff --git a/website/docs/03-Usage Docs/14-export-to-databases.md b/website/docs/03-Usage Docs/14-export-to-databases.md index 12daea2c3..573a97463 100644 --- a/website/docs/03-Usage Docs/14-export-to-databases.md +++ b/website/docs/03-Usage Docs/14-export-to-databases.md @@ -34,7 +34,7 @@ Synchronize table to MySQL task ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.SyncDatabase \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --target_db.url jdbc:mysql://172.17.0.4:3306/ \ --target_db.db_type mysql \ --target_db.db_name test \ @@ -50,7 +50,7 @@ Synchronize table to postgresql task ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.SyncDatabase \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --target_db.url jdbc:postgresql://172.17.0.2:5432/ \ --target_db.db_name test \ --target_db.db_type postgres \ @@ -65,7 +65,7 @@ Synchronize table to postgresql task Synchronize table to doris task ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.SyncDatabase \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --target_db.url "jdbc:mysql://172.17.0.2:9030/" \ --source_db.db_name test \ --target_db.db_name test \ diff --git a/website/docs/03-Usage Docs/15-spark-gluten/index.md b/website/docs/03-Usage Docs/15-spark-gluten/index.md index 991977707..630ac834f 100644 --- a/website/docs/03-Usage Docs/15-spark-gluten/index.md +++ b/website/docs/03-Usage Docs/15-spark-gluten/index.md @@ -28,7 +28,7 @@ $SPARK_HOME/bin/spark-shell --master local\[1\] --driver-memory 4g \ --conf spark.sql.catalog.lakesoul=org.apache.spark.sql.lakesoul.catalog.LakeSoulCatalog \ --conf spark.sql.defaultCatalog=lakesoul \ # Introduce the jars of LakeSoul and Gluten - --jars lakesoul-spark-spark-3.3-VAR::VERSION.jar,gluten-velox-bundle-spark3.3_2.12-1.1.0.jar + --jars lakesoul-spark-3.3-VAR::VERSION.jar,gluten-velox-bundle-spark3.3_2.12-1.1.0.jar ``` After starting the Spark task in this way, Gluten and LakeSoul can be enabled at the same time to achieve dual acceleration of IO performance and computing performance. diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/01-Getting Started/01-setup-local-env.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/01-Getting Started/01-setup-local-env.md index 9a3eafe0f..0b12a64f5 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/01-Getting Started/01-setup-local-env.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/01-Getting Started/01-setup-local-env.md @@ -43,10 +43,10 @@ https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-without-hadoop.tgz LakeSoul 发布 jar 包可以从 GitHub Releases 页面下载:https://github.com/lakesoul-io/LakeSoul/releases 。下载后请将 Jar 包放到 Spark 安装目录下的 jars 目录中: ```bash -wget https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-spark-spark-3.3-VAR::VERSION.jar -P $SPARK_HOME/jars +wget https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-spark-3.3-VAR::VERSION.jar -P $SPARK_HOME/jars ``` -如果访问 Github 有问题,也可以从如下链接下载:https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/lakesoul/lakesoul-spark-spark-3.3-VAR::VERSION.jar +如果访问 Github 有问题,也可以从如下链接下载:https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/lakesoul/lakesoul-spark-3.3-VAR::VERSION.jar :::tip 从 2.1.0 版本起,LakeSoul 自身的依赖已经通过 shade 方式打包到一个 jar 包中。之前的版本是多个 jar 包以 tar.gz 压缩包的形式发布。 @@ -93,7 +93,7 @@ spark.sql.catalog.lakesoul | org.apache.spark.sql.lakesoul.catalog.LakeSoulCatal spark.sql.defaultCatalog | lakesoul ### 1.4 Flink 本地环境搭建 -以当前发布最新版本为例,LakeSoul Flink jar 包下载地址为:https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-flink-1.17-VAR::VERSION.jar +以当前发布最新版本为例,LakeSoul Flink jar 包下载地址为:https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-1.17-VAR::VERSION.jar 最新版本支持 flink 集群为1.17,Flink jar下载地址为:https://dlcdn.apache.org/flink/flink-1.17.2/flink-1.17.2-bin-scala_2.12.tgz @@ -105,7 +105,7 @@ spark.sql.defaultCatalog | lakesoul export lakesoul_home=/opt/soft/pg.property && ./bin/start-cluster.sh # 启动 flink sql client -export lakesoul_home=/opt/soft/pg.property && ./bin/sql-client.sh embedded -j lakesoul-flink-flink-1.17-VAR::VERSION.jar +export lakesoul_home=/opt/soft/pg.property && ./bin/sql-client.sh embedded -j lakesoul-flink-1.17-VAR::VERSION.jar ``` #### 1.4.2 将数据写入对象存储服务 diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/01-Getting Started/02-spark-guide.mdx b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/01-Getting Started/02-spark-guide.mdx index bff166243..72bfe1838 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/01-Getting Started/02-spark-guide.mdx +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/01-Getting Started/02-spark-guide.mdx @@ -60,8 +60,8 @@ LakeSoul | Spark Version ```xml com.dmetasoul - lakesoul - spark-3.3-VAR::VERSION + lakesoul-spark + 3.3-VAR::VERSION ``` diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/02-Tutorials/02-flink-cdc-sink/index.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/02-Tutorials/02-flink-cdc-sink/index.md index 27201fc9c..8be308c74 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/02-Tutorials/02-flink-cdc-sink/index.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/02-Tutorials/02-flink-cdc-sink/index.md @@ -90,7 +90,7 @@ $FLINK_HOME/bin/start-cluster.sh ```bash ./bin/flink run -ys 1 -yjm 1G -ytm 2G \ -c org.apache.flink.lakesoul.entry.MysqlCdc \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --source_db.host localhost \ --source_db.port 3306 \ --source_db.db_name test_cdc \ @@ -105,7 +105,7 @@ $FLINK_HOME/bin/start-cluster.sh --server_time_zone UTC ``` -其中 lakesoul-flink 的 jar 包可以从 [Github Release](https://github.com/lakesoul-io/LakeSoul/releases/) 页面下载。如果访问 Github 有问题,也可以通过这个链接下载:https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/lakesoul/lakesoul-flink-flink-1.17-VAR::VERSION.jar +其中 lakesoul-flink 的 jar 包可以从 [Github Release](https://github.com/lakesoul-io/LakeSoul/releases/) 页面下载。如果访问 Github 有问题,也可以通过这个链接下载:https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/lakesoul/lakesoul-flink-1.17-VAR::VERSION.jar 在 http://localhost:8081 Flink 作业页面中,点击 Running Job,进入查看 LakeSoul 作业是否已经处于 `Running` 状态。 diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/02-Tutorials/07-kafka-topics-data-to-lakesoul.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/02-Tutorials/07-kafka-topics-data-to-lakesoul.md index 9c34dc637..4d8940b27 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/02-Tutorials/07-kafka-topics-data-to-lakesoul.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/02-Tutorials/07-kafka-topics-data-to-lakesoul.md @@ -78,7 +78,7 @@ export lakesoul_home=./pg.properties && ./bin/spark-submit \ --driver-memory 4g \ --executor-memory 4g \ --master local[4] \ -./jars/lakesoul-spark-spark-3.3-VAR::VERSION.jar \ +./jars/lakesoul-spark-3.3-VAR::VERSION.jar \ localhost:9092 test.* /tmp/kafka/data /tmp/kafka/checkpoint/ kafka earliest false ``` @@ -155,6 +155,6 @@ export lakesoul_home=./pg.properties && ./bin/spark-submit \ --driver-memory 4g \ --executor-memory 4g \ --master local[4] \ -./jars/lakesoul-spark-spark-3.3-VAR::VERSION.jar \ +./jars/lakesoul-spark-3.3-VAR::VERSION.jar \ localhost:9092 test.* /tmp/kafka/data /tmp/kafka/checkpoint/ kafka earliest false http://localhost:8081 ``` \ No newline at end of file diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/02-setup-spark.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/02-setup-spark.md index 9c332739c..51263e457 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/02-setup-spark.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/02-setup-spark.md @@ -15,22 +15,22 @@ LakeSoul 目前支持 Spark 3.3 + Scala 2.12. #### 使用 `--packages` 传 Maven 仓库和包名 ```bash -spark-shell --packages com.dmetasoul:lakesoul-spark:spark-3.3-VAR::VERSION +spark-shell --packages com.dmetasoul:lakesoul-spark:3.3-VAR::VERSION ``` #### 使用打包好的 LakeSoul 包 可以从 [Releases](https://github.com/lakesoul-io/LakeSoul/releases) 页面下载已经打包好的 LakeSoul Jar 包。 下载 jar 并传给 `spark-submit` 命令: ```bash -spark-submit --jars "lakesoul-spark-spark-3.3-VAR::VERSION.jar" +spark-submit --jars "lakesoul-spark-3.3-VAR::VERSION.jar" ``` #### 直接将 Jar 包放在 Spark 环境中 可以将 Jar 包下载后,放在 $SPARK_HOME/jars 中。 -Jar 包可以从 Github Release 页面下载:https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-spark-spark-3.3-VAR::VERSION.jar +Jar 包可以从 Github Release 页面下载:https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-spark-3.3-VAR::VERSION.jar -或者从国内地址下载:https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/lakesoul/lakesoul-spark-spark-3.3-VAR::VERSION.jar +或者从国内地址下载:https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/lakesoul/lakesoul-spark-3.3-VAR::VERSION.jar ### 设置 Java/Scala 项目 增加以下 Maven 依赖项: @@ -38,7 +38,7 @@ Jar 包可以从 Github Release 页面下载:https://github.com/lakesoul-io/La com.dmetasoul lakesoul-spark - spark-3.3-VAR::VERSION + 3.3-VAR::VERSION ``` @@ -139,7 +139,7 @@ taskmanager.memory.task.off-heap.size: 3000m ### 添加 LakeSoul Jar 到 Flink 部署的目录 -从以下地址下载 LakeSoul Flink Jar:https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-flink-1.17-VAR::VERSION.jar +从以下地址下载 LakeSoul Flink Jar:https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-1.17-VAR::VERSION.jar 并将 jar 文件放在 `$FLINK_HOME/lib` 下。在此之后,您可以像往常一样启动 flink 会话集群或应用程序。 @@ -160,6 +160,6 @@ export HADOOP_CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath` com.dmetasoul lakesoul - flink-1.17-VAR::VERSION + 1.17-VAR::VERSION ``` \ No newline at end of file diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md index ca6549799..a9516d2b3 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md @@ -19,9 +19,9 @@ LakeSoul 自 2.1.0 版本起,实现了 Flink CDC Sink,能够支持 Table API ## 命令行使用方法 ### 1. 下载 LakeSoul Flink Jar -可以在 LakeSoul Release 页面下载:https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-flink-1.17-VAR::VERSION.jar。 +可以在 LakeSoul Release 页面下载:https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-1.17-VAR::VERSION.jar。 -如果访问 Github 有问题,也可以通过这个链接下载:https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/lakesoul/lakesoul-flink-flink-1.17-VAR::VERSION.jar。 +如果访问 Github 有问题,也可以通过这个链接下载:https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/lakesoul/lakesoul-flink-1.17-VAR::VERSION.jar。 目前支持的 Flink 版本为 1.17。 @@ -63,7 +63,7 @@ export LAKESOUL_PG_PASSWORD=root | 参数 | 含义 | 取值说明 | |----------------------|--------------------------------------------------------------------------------------|---------------------------------------------------------------------| | -c | 任务运行main函数入口类 | org.apache.flink.lakesoul.entry.JdbcCDC | -| 主程序包 | 任务运行jar包 | lakesoul-flink-flink-1.17-VAR::VERSION.jar | +| 主程序包 | 任务运行jar包 | lakesoul-flink-1.17-VAR::VERSION.jar | | --source_db.type | 源数据库类型 | mysql postgres oracle | | --source_db.host | 源数据库的地址 | | | --source_db.port | 源数据库的端口 | | @@ -96,7 +96,7 @@ export LAKESOUL_PG_PASSWORD=root 对于Mysql数据库配置,可参考https://ververica.github.io/flink-cdc-connectors/release-2.4/content/connectors/mysql-cdc.html ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.JdbcCDC \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --source_db.db_name "testDB" \ --source_db.user "root" \ --source.parallelism 1 \ @@ -124,7 +124,7 @@ export LAKESOUL_PG_PASSWORD=root https://ververica.github.io/flink-cdc-connectors/release-2.4/content/connectors/oracle-cdc.html ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.JdbcCDC \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --source_db.db_type oracle \ --source_db.db_name "testDB" \ --source_db.user "FLINKUSER" \ @@ -156,7 +156,7 @@ https://ververica.github.io/flink-cdc-connectors/release-2.4/content/connectors/ 对于Postgres数据库配置,可参考 https://ververica.github.io/flink-cdc-connectors/release-2.4/content/connectors/postgres-cdc.html ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.JdbcCDC \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --source_db.db_name "postgres" \ --source_db.user "postgres" \ --source.parallelism 1 \ diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/06-flink-lakesoul-connector.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/06-flink-lakesoul-connector.md index a21c1cdc6..012d8cbd7 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/06-flink-lakesoul-connector.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/06-flink-lakesoul-connector.md @@ -24,14 +24,14 @@ flink.warehouse.dir: "s3://bucket/path" ``` 如果指定了 warehouse 路径,则表路径默认为 `warehouse_dir/table_name`。如果建表时在属性中指定了 `path` 属性,则优先使用该属性作为表的存储路径。 -Flink 引入 LakeSoul 依赖的方法:下載 lakesoul-flink-flink-1.17-VAR::VERSION.jar,放入 `$FLINK_HOME/lib` ,或在启动时指定 jar 的路径。 +Flink 引入 LakeSoul 依赖的方法:下載 lakesoul-flink-1.17-VAR::VERSION.jar,放入 `$FLINK_HOME/lib` ,或在启动时指定 jar 的路径。 为了使用 Flink 创建 LakeSoul 表,推荐使用 Flink SQL Client,支持直接使用 Flink SQL 命令操作 LakeSoul 表,本文档中 Flink SQL 是在 Flink SQL Client 界面直接输入语句;Table API 需要在 Java 项目中编写使用。 切换到 Flink 文件夹下,执行命令开启 SQL Client 客户端。 ```bash # 启动 Flink SQL Client -bin/sql-client.sh embedded -j lakesoul-flink-flink-1.17-VAR::VERSION.jar +bin/sql-client.sh embedded -j lakesoul-flink-1.17-VAR::VERSION.jar ``` ## 2. DDL diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/08-auto-compaction-task.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/08-auto-compaction-task.md index c3f4f506e..e9983b774 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/08-auto-compaction-task.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/08-auto-compaction-task.md @@ -46,7 +46,7 @@ trigger 和 pg 函数在数据库初始化的时候已经配置,默认压缩 --conf "spark.executor.extraJavaOptions=-XX:MaxDirectMemorySize=4G" \ --conf "spark.executor.memoryOverhead=3g" \ --class com.dmetasoul.lakesoul.spark.compaction.CompactionTask \ - jars/lakesoul-spark-spark-3.3-VAR::VERSION.jar + jars/lakesoul-spark-3.3-VAR::VERSION.jar --threadpool.size=10 --database=test ``` diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/09-clean-redundant-data.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/09-clean-redundant-data.md index 1ca0182c3..8e0861302 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/09-clean-redundant-data.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/09-clean-redundant-data.md @@ -88,7 +88,7 @@ LakeSoul 提供了一个清理过期数据的 Spark 作业实现,会扫描元 --executor-cores 1 \ --num-executors 20 \ --class com.dmetasoul.lakesoul.spark.clean.CleanExpiredData \ - jars/lakesoul-spark-spark-3.3-VAR::VERSION.jar + jars/lakesoul-spark-3.3-VAR::VERSION.jar ``` :::tip diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/13-setup-kyuubi.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/13-setup-kyuubi.md index 06f5331ee..f4cd11278 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/13-setup-kyuubi.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/13-setup-kyuubi.md @@ -32,7 +32,7 @@ LakeSoul实现了Flink/Spark Connector。我们可以通过Kyuubi使用Spark/Fli ### 1. 依赖 -下载LakeSoul Flink Jar: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-flink-1.17-VAR::VERSION.jar +下载LakeSoul Flink Jar: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-flink-1.17-VAR::VERSION.jar 将该jar拷贝至 `$FLINK_HOME/lib`. @@ -76,7 +76,7 @@ drop table `lakesoul`.`default`.test_lakesoul_table_v1; ### 1. 依赖 -下载LakeSoul Spark Jar: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-spark-spark-3.3-VAR::VERSION.jar +下载LakeSoul Spark Jar: https://github.com/lakesoul-io/LakeSoul/releases/download/vVAR::VERSION/lakesoul-spark-3.3-VAR::VERSION.jar 将该jar拷贝至 `$SPARK_HOME/jars`. diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/14-export-to-databases.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/14-export-to-databases.md index bc760a77d..fd6333bac 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/14-export-to-databases.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/14-export-to-databases.md @@ -34,7 +34,7 @@ LakeSoul 至 2.5.0 开始,支持单表数据以批同步出湖,流同步出 ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.SyncDatabase \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --target_db.url jdbc:mysql://172.17.0.4:3306/ \ --target_db.db_type mysql \ --target_db.db_name test \ @@ -49,7 +49,7 @@ LakeSoul 至 2.5.0 开始,支持单表数据以批同步出湖,流同步出 出湖postgres任务启动 ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.SyncDatabase \ - lakesoul-flink-flink-1.17-VAR::VERSION.jar \ + lakesoul-flink-1.17-VAR::VERSION.jar \ --target_db.url jdbc:postgresql://172.17.0.2:5432/ \ --target_db.db_name test \ --target_db.db_type postgres \ @@ -64,7 +64,7 @@ LakeSoul 至 2.5.0 开始,支持单表数据以批同步出湖,流同步出 出湖到doris任务启动 ```bash ./bin/flink run -c org.apache.flink.lakesoul.entry.SyncDatabase \ -lakesoul-flink-flink-1.17-VAR::VERSION.jar \ +lakesoul-flink-1.17-VAR::VERSION.jar \ --target_db.url "jdbc:mysql://172.17.0.2:9030/" \ --source_db.db_name test \ --target_db.db_name test \ diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/15-spark-gluten/index.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/15-spark-gluten/index.md index 458845d73..b27c8632a 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/15-spark-gluten/index.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/15-spark-gluten/index.md @@ -27,7 +27,7 @@ $SPARK_HOME/bin/spark-shell --master local\[1\] --driver-memory 4g \ --conf spark.sql.catalog.lakesoul=org.apache.spark.sql.lakesoul.catalog.LakeSoulCatalog \ --conf spark.sql.defaultCatalog=lakesoul \ # 引入 LakeSoul、Gluten 的 jar - --jars lakesoul-spark-spark-3.3-VAR::VERSION.jar,gluten-velox-bundle-spark3.3_2.12-1.1.0.jar + --jars lakesoul-spark-3.3-VAR::VERSION.jar,gluten-velox-bundle-spark3.3_2.12-1.1.0.jar ``` 以这样的方式启动 Spark 任务后,即可同时启用 Gluten 和 LakeSoul,实现 IO 性能、计算性能的双重加速。