Skip to content

Commit

Permalink
No int test/dac 1919 raw layer single table (#479)
Browse files Browse the repository at this point in the history
* raw layer events single table schema

* raw layer events single table schema

* created state machine defintion

* created state machine resource

* created state machine resource
  • Loading branch information
sbeesla-gds authored Dec 20, 2023
1 parent 88aa440 commit 4e0f700
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 0 deletions.
17 changes: 17 additions & 0 deletions iac/main/resources/raw.yml
Original file line number Diff line number Diff line change
Expand Up @@ -910,3 +910,20 @@ RawGlueDatabase:
CatalogId: !Sub ${AWS::AccountId}
DatabaseInput:
Name: !Sub ${Environment}-${RawGlueDatabaseName}

RawLayerSingleTableCrawler:
Type: AWS::Glue::Crawler
Properties:
Name: txma_raw_layer_events_schema_combined
Role: !GetAtt RawGlueCrawlerRole.Arn
Targets:
S3Targets:
- Path: !Sub 's3://${RawLayerBucket}/txma/'
DatabaseName: !Ref RawGlueDatabase
CrawlerSecurityConfiguration: !Ref GlueSecurityConfig
RecrawlPolicy:
RecrawlBehavior: CRAWL_EVERYTHING
SchemaChangePolicy:
UpdateBehavior: UPDATE_IN_DATABASE
DeleteBehavior: DELETE_FROM_DATABASE
Configuration: '{"Version":1,"CrawlerOutput":{"Partitions":{"AddOrUpdateBehavior":"InheritFromTable"}}, "Grouping": {"TableGroupingPolicy": "CombineCompatibleSchemas"}}'
22 changes: 22 additions & 0 deletions iac/main/resources/state-machine.yml
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ StepFunctionRole:
Resource:
- !Sub 'arn:aws:states:${AWS::Region}:${AWS::AccountId}:stateMachine:${Environment}-dap-raw-to-stage-process'
- !Sub 'arn:aws:states:${AWS::Region}:${AWS::AccountId}:stateMachine:${Environment}-dap-redshift-processing'
- !Sub 'arn:aws:states:${AWS::Region}:${AWS::AccountId}:stateMachine:${Environment}-dap-txma-raw-consolidated-schema-to-stage-process'
Action:
- states:ListExecutions
- states:StartExecution
Expand Down Expand Up @@ -660,3 +661,24 @@ StepFunctionRedshiftProcessRole:
- redshift-data:GetStatementResult
- redshift-data:DescribeStatement
- redshift-data:ListStatements

TxmaRawLayerConsolidatedSchemaProcessingStateMachine:
Type: AWS::Serverless::StateMachine # More info about State Machine Resource: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-resource-statemachine.html
Properties:
DefinitionUri: statemachine/txma_raw_layer_consolidated_schema_processing.asl.json
Name: !Sub ${Environment}-dap-txma-raw-consolidated-schema-to-stage-process
Role: !GetAtt StepFunctionRole.Arn
Logging:
Destinations:
- CloudWatchLogsLogGroup:
LogGroupArn: !GetAtt AthenaRawLayerProcessingLogGroup.Arn
IncludeExecutionData: true
Level: ALL
DefinitionSubstitutions:
GlueCrawlerRawName: !Ref RawLayerSingleTableCrawler
Events:
DailySchedule:
Type: Schedule # More info about Schedule Event Source: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-property-statemachine-schedule.html
Properties:
Enabled: !Not IsTest
Schedule: cron(0 6 * * ? *)
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
{
"Comment": "AWS Step Functions state machine that processes raw datasets (consolidated schema) from TxMA into the staging layer of Athena",
"StartAt": "ListExecutions",
"States": {
"ListExecutions": {
"Type": "Task",
"Next": "ValidateRunningInstances",
"Parameters": {
"StateMachineArn.$": "$$.StateMachine.Id",
"StatusFilter": "RUNNING"
},
"Resource": "arn:aws:states:::aws-sdk:sfn:listExecutions",
"ResultSelector": {
"runningExecutionsCount.$": "States.ArrayLength($.Executions)"
}
},
"ValidateRunningInstances": {
"Type": "Choice",
"Choices": [
{
"Variable": "$.runningExecutionsCount",
"NumericGreaterThan": 1,
"Next": "RunningInstanceDetected"
}
],
"Default": "StartRawCrawler"
},
"RunningInstanceDetected": {
"Type": "Fail",
"Error": "RunningInstanceDetected"
},
"StartRawCrawler": {
"Type": "Task",
"Resource": "arn:aws:states:::aws-sdk:glue:startCrawler",
"Parameters": {
"Name": "${GlueCrawlerRawName}"
},
"Next": "MonitorRawCrawlerStatus",
"ResultPath": "$.rawCrawlerResult"
},
"MonitorRawCrawlerStatus": {
"Type": "Task",
"Resource": "arn:aws:states:::aws-sdk:glue:getCrawler",
"Parameters": {
"Name": "${GlueCrawlerRawName}"
},
"ResultPath": "$.crawlerRawStatus",
"Next": "CheckRawCrawlerStatus"
},
"CheckRawCrawlerStatus": {
"Type": "Choice",
"Choices": [
{
"Variable": "$.crawlerRawStatus.Crawler.State",
"StringEquals": "READY",
"Next": "RawCrawlerFinished"
},
{
"Or": [
{
"Variable": "$.crawlerRawStatus.Crawler.State",
"StringEquals": "RUNNING"
},
{
"Variable": "$.crawlerRawStatus.Crawler.State",
"StringEquals": "STOPPING"
},
{
"Variable": "$.crawlerRawStatus.Crawler.State",
"StringEquals": "STARTED"
}
],
"Next": "WaitForRawCrawler"
}
],
"Default": "CrawlerFailed"
},
"WaitForRawCrawler": {
"Type": "Wait",
"Seconds": 30,
"Next": "MonitorRawCrawlerStatus"
},
"RawCrawlerFinished": {
"Type": "Pass",
"Next": "StopProcessing"
},
"StopProcessing": {
"Type": "Pass",
"End": true
},
"CrawlerFailed": {
"Type": "Fail",
"Error": "CrawlerFailed",
"Cause": "The Glue crawler execution failed."
}
}
}

0 comments on commit 4e0f700

Please sign in to comment.