-
Notifications
You must be signed in to change notification settings - Fork 98
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[improvement] support two phases commit in structured streaming (#156)
- Loading branch information
Showing
7 changed files
with
262 additions
and
113 deletions.
There are no files selected for viewing
83 changes: 0 additions & 83 deletions
83
...s-connector/src/main/scala/org/apache/doris/spark/listener/DorisTransactionListener.scala
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
spark-doris-connector/src/main/scala/org/apache/doris/spark/txn/TransactionHandler.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
package org.apache.doris.spark.txn | ||
|
||
import org.apache.doris.spark.cfg.{ConfigurationOptions, SparkSettings} | ||
import org.apache.doris.spark.load.{CachedDorisStreamLoadClient, DorisStreamLoad} | ||
import org.apache.doris.spark.sql.Utils | ||
import org.apache.spark.internal.Logging | ||
|
||
import java.time.Duration | ||
import scala.collection.mutable | ||
import scala.util.{Failure, Success} | ||
|
||
/** | ||
* Stream load transaction handler | ||
* | ||
* @param settings job settings | ||
*/ | ||
class TransactionHandler(settings: SparkSettings) extends Logging { | ||
|
||
private val sinkTxnIntervalMs: Int = settings.getIntegerProperty(ConfigurationOptions.DORIS_SINK_TXN_INTERVAL_MS, | ||
ConfigurationOptions.DORIS_SINK_TXN_INTERVAL_MS_DEFAULT) | ||
private val sinkTxnRetries: Integer = settings.getIntegerProperty(ConfigurationOptions.DORIS_SINK_TXN_RETRIES, | ||
ConfigurationOptions.DORIS_SINK_TXN_RETRIES_DEFAULT) | ||
private val dorisStreamLoad: DorisStreamLoad = CachedDorisStreamLoadClient.getOrCreate(settings) | ||
|
||
/** | ||
* commit transactions | ||
* | ||
* @param txnIds transaction id list | ||
*/ | ||
def commitTransactions(txnIds: List[Long]): Unit = { | ||
log.debug(s"start to commit transactions, count ${txnIds.size}") | ||
val (failedTxnIds, ex) = txnIds.map(commitTransaction).filter(_._1.nonEmpty) | ||
.map(e => (e._1.get, e._2.get)) | ||
.aggregate((mutable.Buffer[Long](), new Exception))( | ||
(z, r) => ((z._1 += r._1).asInstanceOf[mutable.Buffer[Long]], r._2), (r1, r2) => (r1._1 ++ r2._1, r2._2)) | ||
if (failedTxnIds.nonEmpty) { | ||
log.error("uncommitted txn ids: {}", failedTxnIds.mkString("[", ",", "]")) | ||
throw ex | ||
} | ||
} | ||
|
||
/** | ||
* commit single transaction | ||
* | ||
* @param txnId transaction id | ||
* @return | ||
*/ | ||
private def commitTransaction(txnId: Long): (Option[Long], Option[Exception]) = { | ||
Utils.retry(sinkTxnRetries, Duration.ofMillis(sinkTxnIntervalMs), log) { | ||
dorisStreamLoad.commit(txnId) | ||
}() match { | ||
case Success(_) => (None, None) | ||
case Failure(e: Exception) => (Option(txnId), Option(e)) | ||
} | ||
} | ||
|
||
/** | ||
* abort transactions | ||
* | ||
* @param txnIds transaction id list | ||
*/ | ||
def abortTransactions(txnIds: List[Long]): Unit = { | ||
log.debug(s"start to abort transactions, count ${txnIds.size}") | ||
var ex: Option[Exception] = None | ||
val failedTxnIds = txnIds.map(txnId => | ||
Utils.retry(sinkTxnRetries, Duration.ofMillis(sinkTxnIntervalMs), log) { | ||
dorisStreamLoad.abortById(txnId) | ||
}() match { | ||
case Success(_) => None | ||
case Failure(e: Exception) => | ||
ex = Option(e) | ||
Option(txnId) | ||
}).filter(_.nonEmpty).map(_.get) | ||
if (failedTxnIds.nonEmpty) { | ||
log.error("not aborted txn ids: {}", failedTxnIds.mkString("[", ",", "]")) | ||
} | ||
} | ||
|
||
} | ||
|
||
object TransactionHandler { | ||
def apply(settings: SparkSettings): TransactionHandler = new TransactionHandler(settings) | ||
} |
66 changes: 66 additions & 0 deletions
66
...nnector/src/main/scala/org/apache/doris/spark/txn/listener/DorisTransactionListener.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
package org.apache.doris.spark.txn.listener | ||
|
||
import org.apache.doris.spark.cfg.SparkSettings | ||
import org.apache.doris.spark.txn.TransactionHandler | ||
import org.apache.spark.internal.Logging | ||
import org.apache.spark.scheduler._ | ||
import org.apache.spark.util.CollectionAccumulator | ||
|
||
import scala.collection.JavaConverters._ | ||
import scala.collection.mutable | ||
|
||
class DorisTransactionListener(preCommittedTxnAcc: CollectionAccumulator[Long], settings: SparkSettings) | ||
extends SparkListener with Logging { | ||
|
||
val txnHandler: TransactionHandler = TransactionHandler(settings) | ||
|
||
override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { | ||
val txnIds: mutable.Buffer[Long] = preCommittedTxnAcc.value.asScala | ||
jobEnd.jobResult match { | ||
// if job succeed, commit all transactions | ||
case JobSucceeded => | ||
if (txnIds.isEmpty) { | ||
log.debug("job run succeed, but there is no pre-committed txn ids") | ||
return | ||
} | ||
log.info("job run succeed, start committing transactions") | ||
try txnHandler.commitTransactions(txnIds.toList) | ||
catch { | ||
case e: Exception => throw e | ||
} | ||
finally preCommittedTxnAcc.reset() | ||
log.info("commit transaction success") | ||
// if job failed, abort all pre committed transactions | ||
case _ => | ||
if (txnIds.isEmpty) { | ||
log.debug("job run failed, but there is no pre-committed txn ids") | ||
return | ||
} | ||
log.info("job run failed, start aborting transactions") | ||
try txnHandler.abortTransactions(txnIds.toList) | ||
catch { | ||
case e: Exception => throw e | ||
} | ||
finally preCommittedTxnAcc.reset() | ||
log.info("abort transaction success") | ||
} | ||
} | ||
|
||
} |
69 changes: 69 additions & 0 deletions
69
...r/src/main/scala/org/apache/doris/spark/txn/listener/DorisTxnStreamingQueryListener.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
package org.apache.doris.spark.txn.listener | ||
|
||
import org.apache.doris.spark.cfg.SparkSettings | ||
import org.apache.doris.spark.txn.TransactionHandler | ||
import org.apache.spark.internal.Logging | ||
import org.apache.spark.sql.streaming.StreamingQueryListener | ||
import org.apache.spark.util.CollectionAccumulator | ||
|
||
import scala.collection.JavaConverters._ | ||
import scala.collection.mutable | ||
|
||
class DorisTxnStreamingQueryListener(preCommittedTxnAcc: CollectionAccumulator[Long], settings: SparkSettings) | ||
extends StreamingQueryListener with Logging { | ||
|
||
private val txnHandler = TransactionHandler(settings) | ||
|
||
override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = {} | ||
|
||
override def onQueryProgress(event: StreamingQueryListener.QueryProgressEvent): Unit = { | ||
// do commit transaction when each batch ends | ||
val txnIds: mutable.Buffer[Long] = preCommittedTxnAcc.value.asScala | ||
if (txnIds.isEmpty) { | ||
log.warn("job run succeed, but there is no pre-committed txn ids") | ||
return | ||
} | ||
log.info(s"batch[${event.progress.batchId}] run succeed, start committing transactions") | ||
try txnHandler.commitTransactions(txnIds.toList) | ||
catch { | ||
case e: Exception => throw e | ||
} finally preCommittedTxnAcc.reset() | ||
log.info(s"batch[${event.progress.batchId}] commit transaction success") | ||
} | ||
|
||
|
||
override def onQueryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = { | ||
val txnIds: mutable.Buffer[Long] = preCommittedTxnAcc.value.asScala | ||
// if job failed, abort all pre committed transactions | ||
if (event.exception.nonEmpty) { | ||
if (txnIds.isEmpty) { | ||
log.warn("job run failed, but there is no pre-committed txn ids") | ||
return | ||
} | ||
log.info("job run failed, start aborting transactions") | ||
try txnHandler.abortTransactions(txnIds.toList) | ||
catch { | ||
case e: Exception => throw e | ||
} finally preCommittedTxnAcc.reset() | ||
log.info("abort transaction success") | ||
} | ||
} | ||
|
||
} |
Oops, something went wrong.