From 38731b3361f0ffa26423bae2455ba89cd810b164 Mon Sep 17 00:00:00 2001 From: sbeesla-gds Date: Mon, 29 Apr 2024 14:33:06 +0100 Subject: [PATCH 1/3] added redshift processing step function --- iac/main/resources/state-machine.yml | 27 ++++++ ...er_consolidated_schema_processing.asl.json | 12 +++ ...ft_consolidated_schema_processing.asl.json | 88 +++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 statemachine/txma_redshift_consolidated_schema_processing.asl.json diff --git a/iac/main/resources/state-machine.yml b/iac/main/resources/state-machine.yml index 509d2a576..e47d7295e 100644 --- a/iac/main/resources/state-machine.yml +++ b/iac/main/resources/state-machine.yml @@ -704,6 +704,7 @@ StepFunctionRedshiftProcessRole: - Effect: Allow Resource: - !Sub 'arn:aws:states:${AWS::Region}:${AWS::AccountId}:stateMachine:${Environment}-dap-redshift-processing' + - !Sub 'arn:aws:states:${AWS::Region}:${AWS::AccountId}:stateMachine:${Environment}-dap-consolidated-stage-layer-to-redshift-processing' Action: - states:ListExecutions @@ -741,6 +742,7 @@ TxmaRawLayerConsolidatedSchemaProcessingStateMachine: GlueCrawlerRawName: !Ref RawLayerSingleTableCrawler RawToStageProcessingGlueJobname: !Ref RawStageTransformProcessPythonGlueJob DataQualityGlueJobname: !Ref DataQualityStageLayerOptimisedPythonGlueJob + RedshiftProcessingStepFunctionArn: !Ref RedshiftProcessingStateMachine Events: DailySchedule: Type: Schedule # More info about Schedule Event Source: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-property-statemachine-schedule.html @@ -882,3 +884,28 @@ DataQualityStageLayerOptimisedPythonGlueJob: SecurityConfiguration: !Ref GlueSecurityConfig Name: !Sub ${Environment}-dap-data-quality-new-stage-metrics-generation Role: !Ref GlueScriptsExecutionRole + +RedshiftConsolidatedModelProcessingLogGroup: + Type: AWS::Logs::LogGroup + Properties: + LogGroupName: /aws/stepfunction/dap-consolidated-stage-layer-to-redshift + KmsKeyId: !GetAtt KmsKey.Arn + RetentionInDays: 30 + +RedshiftConsolidatedModelProcessingStateMachine: + Type: AWS::Serverless::StateMachine # More info about State Machine Resource: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-resource-statemachine.html + Properties: + DefinitionUri: statemachine/txma_redshift_consolidated_schema_processing.asl + Name: !Sub ${Environment}-dap-consolidated-stage-layer-to-redshift-processing + Role: !GetAtt StepFunctionRedshiftProcessRole.Arn + Logging: + Destinations: + - CloudWatchLogsLogGroup: + LogGroupArn: !GetAtt RedshiftConsolidatedModelProcessingLogGroup.Arn + IncludeExecutionData: true + Level: ALL + DefinitionSubstitutions: + S3MetaDataBucketName: !Ref ELTMetadataBucket + RedshiftWorkgroup: !Ref RedshiftServerlessWorkgroup + RedshiftDatabaseName: dap_txma_reporting_db_refactored + StateMachineResultsBucket: !Ref StateMachineResultsBucket \ No newline at end of file diff --git a/statemachine/txma_raw_layer_consolidated_schema_processing.asl.json b/statemachine/txma_raw_layer_consolidated_schema_processing.asl.json index 755ede874..94e9cd59c 100644 --- a/statemachine/txma_raw_layer_consolidated_schema_processing.asl.json +++ b/statemachine/txma_raw_layer_consolidated_schema_processing.asl.json @@ -98,6 +98,18 @@ "Parameters": { "JobName": "${DataQualityGlueJobname}" }, + "Next": "RedshiftProcessingStepFunction" + }, + "RedshiftProcessingStepFunction": { + "Type": "Task", + "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Parameters": { + "StateMachineArn": "${RedshiftProcessingStepFunctionArn}", + "Input": { + "StatePayload": "Triggered from raw to stage consolidated step function", + "AWS_STEP_FUNCTIONS_STARTED_BY_EXECUTION_ID.$": "$$.Execution.Id" + } + }, "Next": "StopProcessing" }, "StopProcessing": { diff --git a/statemachine/txma_redshift_consolidated_schema_processing.asl.json b/statemachine/txma_redshift_consolidated_schema_processing.asl.json new file mode 100644 index 000000000..72d086208 --- /dev/null +++ b/statemachine/txma_redshift_consolidated_schema_processing.asl.json @@ -0,0 +1,88 @@ +{ + "Comment": "Redshit ELT processing workflow for loading dimension and fact tables", + "StartAt": "ListExecutions", + "States": { + "ListExecutions": { + "Type": "Task", + "Next": "ValidateRunningInstances", + "Parameters": { + "StateMachineArn.$": "$$.StateMachine.Id", + "StatusFilter": "RUNNING" + }, + "Resource": "arn:aws:states:::aws-sdk:sfn:listExecutions", + "ResultSelector": { + "runningExecutionsCount.$": "States.ArrayLength($.Executions)" + } + }, + "ValidateRunningInstances": { + "Type": "Choice", + "Choices": [ + { + "Variable": "$.runningExecutionsCount", + "NumericGreaterThan": 1, + "Next": "RunningInstanceDetected" + } + ], + "Default": "dap_datamart_update" + }, + "RunningInstanceDetected": { + "Type": "Fail", + "Error": "RunningInstanceDetected" + }, + "dap_datamart_update": { + "Comment": "Invoke Redshift Dap DataMart update script", + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:redshiftdata:executeStatement", + "ResultPath": "$.sql_output", + "Parameters": { + "WorkgroupName": "dev-redshift-serverless-workgroup", + "Database": "dap_txma_reporting_db_refactored", + "Sql": "call conformed_refactored.update_dap_data_mart()" + }, + "Next": "wait_on_dap_datamart_update" + }, + "wait_on_dap_datamart_update": { + "Comment": "Wait before status check", + "Type": "Wait", + "Seconds": 5, + "Next": "dap_datamart_update_status_check" + }, + "dap_datamart_update_status_check": { + "Comment": "Check Task Status", + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:redshiftdata:describeStatement", + "ResultPath": "$.sql_output", + "Parameters": { + "Id.$": "$.sql_output.Id" + }, + "Next": "is_dap_datamart_update_complete" + }, + "is_dap_datamart_update_complete": { + "Comment": "check if DAP Datamart update step is complete", + "Type": "Choice", + "Choices": [ + { + "Variable": "$.sql_output.Status", + "StringEquals": "FAILED", + "Next": "dap_datamart_update_failure" + }, + { + "Variable": "$.sql_output.Status", + "StringEquals": "FINISHED", + "Next": "StopProcessing" + } + ], + "Default": "wait_on_dap_datamart_update" + }, + "dap_datamart_update_failure": { + "Type": "Fail", + "Cause": "Failure within DAP Datamart update_step", + "Error": "Error" + }, + "StopProcessing": { + "Type": "Pass", + "Result": "pass", + "End": true + } + } + } \ No newline at end of file From 1da22b776e12507a036038e1b14a20e6b3273725 Mon Sep 17 00:00:00 2001 From: sbeesla-gds Date: Mon, 29 Apr 2024 14:34:27 +0100 Subject: [PATCH 2/3] resolved format isues --- iac/main/resources/state-machine.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iac/main/resources/state-machine.yml b/iac/main/resources/state-machine.yml index e47d7295e..5f2dd8ef3 100644 --- a/iac/main/resources/state-machine.yml +++ b/iac/main/resources/state-machine.yml @@ -908,4 +908,4 @@ RedshiftConsolidatedModelProcessingStateMachine: S3MetaDataBucketName: !Ref ELTMetadataBucket RedshiftWorkgroup: !Ref RedshiftServerlessWorkgroup RedshiftDatabaseName: dap_txma_reporting_db_refactored - StateMachineResultsBucket: !Ref StateMachineResultsBucket \ No newline at end of file + StateMachineResultsBucket: !Ref StateMachineResultsBucket From d051a52efe5270aaf9a80888107dde09e8282523 Mon Sep 17 00:00:00 2001 From: sbeesla-gds Date: Mon, 29 Apr 2024 14:40:33 +0100 Subject: [PATCH 3/3] resolved format isues --- iac/main/resources/state-machine.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iac/main/resources/state-machine.yml b/iac/main/resources/state-machine.yml index 5f2dd8ef3..a243dfd94 100644 --- a/iac/main/resources/state-machine.yml +++ b/iac/main/resources/state-machine.yml @@ -895,7 +895,7 @@ RedshiftConsolidatedModelProcessingLogGroup: RedshiftConsolidatedModelProcessingStateMachine: Type: AWS::Serverless::StateMachine # More info about State Machine Resource: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-resource-statemachine.html Properties: - DefinitionUri: statemachine/txma_redshift_consolidated_schema_processing.asl + DefinitionUri: statemachine/txma_redshift_consolidated_schema_processing.asl.json Name: !Sub ${Environment}-dap-consolidated-stage-layer-to-redshift-processing Role: !GetAtt StepFunctionRedshiftProcessRole.Arn Logging: