From d5cd77b3efc3f4b764385987274e199df77ca65d Mon Sep 17 00:00:00 2001 From: Anat Balzam <13421451+anatbal@users.noreply.github.com> Date: Mon, 20 Nov 2023 17:45:38 +0200 Subject: [PATCH] Enable malware scanner - Defender for Storage (#3784) * enable defender for storage on TRE core * allow malware scanning only for import * cr comments * cr comments * fix deletion issue * add docs on malware scanning * disable sensitiveDataDiscovery --------- Co-authored-by: anatbal --- .../BlobCreatedTrigger/__init__.py | 7 ++- .../ScanResultTrigger/__init__.py | 8 ++-- airlock_processor/_version.py | 2 +- config.sample.yaml | 2 +- core/terraform/.terraform.lock.hcl | 20 ++++++++ core/terraform/airlock/eventgrid_topics.tf | 47 +++++++++++++++++++ core/terraform/airlock/locals.tf | 2 + core/terraform/airlock/main.tf | 4 ++ core/terraform/airlock/storage_accounts.tf | 26 ++++++++++ core/terraform/main.tf | 8 ++++ core/terraform/variables.tf | 10 ++-- core/version.txt | 2 +- docs/azure-tre-overview/airlock.md | 3 +- docs/tre-admins/environment-variables.md | 2 +- 14 files changed, 130 insertions(+), 13 deletions(-) diff --git a/airlock_processor/BlobCreatedTrigger/__init__.py b/airlock_processor/BlobCreatedTrigger/__init__.py index b55d1ac583..419b0d46af 100644 --- a/airlock_processor/BlobCreatedTrigger/__init__.py +++ b/airlock_processor/BlobCreatedTrigger/__init__.py @@ -32,10 +32,11 @@ def main(msg: func.ServiceBusMessage, logging.error("environment variable 'ENABLE_MALWARE_SCANNING' does not exists. Cannot continue.") raise - if enable_malware_scanning: + if enable_malware_scanning and constants.STORAGE_ACCOUNT_NAME_IMPORT_INPROGRESS in topic: # If malware scanning is enabled, the fact that the blob was created can be dismissed. # It will be consumed by the malware scanning service logging.info('Malware scanning is enabled. no action to perform.') + send_delete_event(dataDeletionEvent, json_body, request_id) return else: logging.info('Malware scanning is disabled. Completing the submitted stage (moving to in_review).') @@ -66,6 +67,10 @@ def main(msg: func.ServiceBusMessage, event_time=datetime.datetime.utcnow(), data_version=constants.STEP_RESULT_EVENT_DATA_VERSION)) + send_delete_event(dataDeletionEvent, json_body, request_id) + + +def send_delete_event(dataDeletionEvent: func.Out[func.EventGridOutputEvent], json_body, request_id): # check blob metadata to find the blob it was copied from blob_client = get_blob_client_from_blob_info( *get_blob_info_from_topic_and_subject(topic=json_body["topic"], subject=json_body["subject"])) diff --git a/airlock_processor/ScanResultTrigger/__init__.py b/airlock_processor/ScanResultTrigger/__init__.py index 32909921f0..0d1891b5a8 100644 --- a/airlock_processor/ScanResultTrigger/__init__.py +++ b/airlock_processor/ScanResultTrigger/__init__.py @@ -14,7 +14,7 @@ def main(msg: func.ServiceBusMessage, logging.info("Python ServiceBus queue trigger processed message - Malware scan result arrived!") body = msg.get_body().decode('utf-8') - logging.info('Python ServiceBus queue trigger processed message: %s', body) + logging.info(f'Python ServiceBus queue trigger processed message: {body}') status_message = None try: @@ -34,7 +34,7 @@ def main(msg: func.ServiceBusMessage, try: json_body = json.loads(body) blob_uri = json_body["data"]["blobUri"] - verdict = json_body["data"]["verdict"] + verdict = json_body["data"]["scanResultType"] except KeyError as e: logging.error("body was not as expected {}", e) raise e @@ -46,10 +46,10 @@ def main(msg: func.ServiceBusMessage, # Otherwise, move the request to the blocked stage completed_step = constants.STAGE_SUBMITTED if verdict == constants.NO_THREATS: - logging.info('No malware were found in request id %s, moving to %s stage', request_id, constants.STAGE_IN_REVIEW) + logging.info(f'No malware were found in request id {request_id}, moving to {constants.STAGE_IN_REVIEW} stage') new_status = constants.STAGE_IN_REVIEW else: - logging.info('Malware was found in request id %s, moving to %s stage', request_id, constants.STAGE_BLOCKING_INPROGRESS) + logging.info(f'Malware was found in request id {request_id}, moving to {constants.STAGE_BLOCKING_INPROGRESS} stage') new_status = constants.STAGE_BLOCKING_INPROGRESS status_message = verdict diff --git a/airlock_processor/_version.py b/airlock_processor/_version.py index ed9d4d87b6..49e0fc1e09 100644 --- a/airlock_processor/_version.py +++ b/airlock_processor/_version.py @@ -1 +1 @@ -__version__ = "0.7.4" +__version__ = "0.7.0" diff --git a/config.sample.yaml b/config.sample.yaml index af790054a8..403c70c874 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -31,7 +31,7 @@ tre: core_app_service_plan_sku: P1v2 resource_processor_vmss_sku: Standard_B2s enable_swagger: true - enable_airlock_malware_scanning: false + enable_airlock_malware_scanning: true # TODO: move to RP default with https://github.com/microsoft/AzureTRE/issues/2948 workspace_app_service_plan_sku: P1v2 diff --git a/core/terraform/.terraform.lock.hcl b/core/terraform/.terraform.lock.hcl index f8c3f1b7c8..ea690ac29e 100644 --- a/core/terraform/.terraform.lock.hcl +++ b/core/terraform/.terraform.lock.hcl @@ -1,6 +1,26 @@ # This file is maintained automatically by "terraform init". # Manual edits may be lost in future updates. +provider "registry.terraform.io/azure/azapi" { + version = "1.9.0" + constraints = "1.9.0" + hashes = [ + "h1:zaLH2Owmj61RX2G1Cy6VDy8Ttfzx+lDsSCyiu5cXkm4=", + "zh:349569471fbf387feaaf8b88da1690669e201147c342f905e5eb03df42b3cf87", + "zh:54346d5fb78cbad3eb7cfd96e1dd7ce4f78666cabaaccfec6ee9437476330018", + "zh:64b799da915ea3a9a58ac7a926c6a31c59fd0d911687804d8e815eda88c5580b", + "zh:9336ed9e112555e0fda8af6be9ba21478e30117d79ba662233311d9560d2b7c6", + "zh:a8aace9897b28ea0b2dbd7a3be3df033e158af40412c9c7670be0956f216ed7e", + "zh:ab23df7de700d9e785009a4ca9ceb38ae1ab894a13f5788847f15d018556f415", + "zh:b4f13f0b13560a67d427c71c85246f8920f98987120341830071df4535842053", + "zh:e58377bf36d8a14d28178a002657865ee17446182dac03525fd43435e41a1b5c", + "zh:ea5db4acc6413fd0fe6b35981e58cdc9850f5f3118031cc3d2581de511aee6aa", + "zh:f0b32c06c6bd4e4af2c02a62be07b947766aeeb09289a03f21aba16c2fd3c60f", + "zh:f1518e766a90c257d7eb36d360dafaf311593a4a9352ff8db0bcfe0ed8cf45ae", + "zh:fa89e84cff0776b5b61ff27049b1d8ed52040bd58c81c4628890d644a6fb2989", + ] +} + provider "registry.terraform.io/hashicorp/azurerm" { version = "3.74.0" constraints = ">= 3.8.0, >= 3.16.0, 3.74.0" diff --git a/core/terraform/airlock/eventgrid_topics.tf b/core/terraform/airlock/eventgrid_topics.tf index 5bbe26e219..991325d2e1 100644 --- a/core/terraform/airlock/eventgrid_topics.tf +++ b/core/terraform/airlock/eventgrid_topics.tf @@ -161,6 +161,36 @@ resource "azurerm_private_endpoint" "eg_data_deletion" { } } +resource "azurerm_eventgrid_topic" "scan_result" { + count = var.enable_malware_scanning ? 1 : 0 + name = local.scan_result_topic_name + location = var.location + resource_group_name = var.resource_group_name + # This is mandatory for the scan result to be published since private networks are not supported yet + public_network_access_enabled = true + + identity { + type = "SystemAssigned" + } + + tags = merge(var.tre_core_tags, { + Publishers = "Airlock Processor;" + }) + + lifecycle { ignore_changes = [tags] } +} + +resource "azurerm_role_assignment" "servicebus_sender_scan_result" { + count = var.enable_malware_scanning ? 1 : 0 + scope = var.airlock_servicebus.id + role_definition_name = "Azure Service Bus Data Sender" + principal_id = azurerm_eventgrid_topic.scan_result[0].identity[0].principal_id + + depends_on = [ + azurerm_eventgrid_topic.scan_result + ] +} + # System topic resource "azurerm_eventgrid_system_topic" "import_inprogress_blob_created" { name = local.import_inprogress_sys_topic_name @@ -395,6 +425,23 @@ resource "azurerm_eventgrid_event_subscription" "data_deletion" { ] } +resource "azurerm_eventgrid_event_subscription" "scan_result" { + count = var.enable_malware_scanning ? 1 : 0 + name = local.scan_result_eventgrid_subscription_name + scope = azurerm_eventgrid_topic.scan_result[0].id + + service_bus_queue_endpoint_id = azurerm_servicebus_queue.scan_result.id + + delivery_identity { + type = "SystemAssigned" + } + + depends_on = [ + azurerm_eventgrid_topic.scan_result, + azurerm_role_assignment.servicebus_sender_scan_result + ] +} + resource "azurerm_eventgrid_event_subscription" "import_inprogress_blob_created" { name = local.import_inprogress_eventgrid_subscription_name scope = azurerm_storage_account.sa_import_in_progress.id diff --git a/core/terraform/airlock/locals.tf b/core/terraform/airlock/locals.tf index 6a108f79a9..37f7fdead0 100644 --- a/core/terraform/airlock/locals.tf +++ b/core/terraform/airlock/locals.tf @@ -23,6 +23,7 @@ locals { status_changed_topic_name = "evgt-airlock-status-changed-${local.topic_name_suffix}" notification_topic_name = "evgt-airlock-notification-${local.topic_name_suffix}" data_deletion_topic_name = "evgt-airlock-data-deletion-${local.topic_name_suffix}" + scan_result_topic_name = "evgt-airlock-scan-result-${local.topic_name_suffix}" step_result_queue_name = "airlock-step-result" status_changed_queue_name = "airlock-status-changed" @@ -35,6 +36,7 @@ locals { step_result_eventgrid_subscription_name = "evgs-airlock-update-status" status_changed_eventgrid_subscription_name = "evgs-airlock-status-changed" data_deletion_eventgrid_subscription_name = "evgs-airlock-data-deletion" + scan_result_eventgrid_subscription_name = "evgs-airlock-scan-result" import_inprogress_eventgrid_subscription_name = "evgs-airlock-import-in-progress-blob-created" import_rejected_eventgrid_subscription_name = "evgs-airlock-import-rejected-blob-created" import_blocked_eventgrid_subscription_name = "evgs-airlock-import-blocked-blob-created" diff --git a/core/terraform/airlock/main.tf b/core/terraform/airlock/main.tf index 89542d8ef7..cea72a09e8 100644 --- a/core/terraform/airlock/main.tf +++ b/core/terraform/airlock/main.tf @@ -5,6 +5,10 @@ terraform { source = "hashicorp/azurerm" version = ">= 3.16" } + azapi = { + source = "Azure/azapi" + version = ">= 1.9.0" + } local = { source = "hashicorp/local" version = ">= 2.2" diff --git a/core/terraform/airlock/storage_accounts.tf b/core/terraform/airlock/storage_accounts.tf index 3bfa4c7cdf..6a76b97f02 100644 --- a/core/terraform/airlock/storage_accounts.tf +++ b/core/terraform/airlock/storage_accounts.tf @@ -111,6 +111,32 @@ resource "azurerm_storage_account" "sa_import_in_progress" { lifecycle { ignore_changes = [tags] } } + +# Enable Airlock Malware Scanning on Core TRE +resource "azapi_resource_action" "enable_defender_for_storage" { + count = var.enable_malware_scanning ? 1 : 0 + type = "Microsoft.Security/defenderForStorageSettings@2022-12-01-preview" + resource_id = "${azurerm_storage_account.sa_import_in_progress.id}/providers/Microsoft.Security/defenderForStorageSettings/current" + method = "PUT" + + body = jsonencode({ + properties = { + isEnabled = true + malwareScanning = { + onUpload = { + isEnabled = true + capGBPerMonth = 5000 + }, + scanResultsEventGridTopicResourceId = azurerm_eventgrid_topic.scan_result[0].id + } + sensitiveDataDiscovery = { + isEnabled = false + } + overrideSubscriptionLevelSettings = true + } + }) +} + resource "azurerm_private_endpoint" "stg_import_inprogress_pe" { name = "pe-stg-import-inprogress-blob-${var.tre_id}" location = var.location diff --git a/core/terraform/main.tf b/core/terraform/main.tf index 3727a8ab01..a065e9fb8c 100644 --- a/core/terraform/main.tf +++ b/core/terraform/main.tf @@ -17,11 +17,19 @@ terraform { source = "hashicorp/http" version = "~> 3.2.0" } + azapi = { + source = "Azure/azapi" + version = "~> 1.9.0" + } } backend "azurerm" {} } +provider "azapi" { + use_msi = var.arm_use_msi +} + provider "azurerm" { features { key_vault { diff --git a/core/terraform/variables.tf b/core/terraform/variables.tf index 5766776051..4ccf9b8428 100644 --- a/core/terraform/variables.tf +++ b/core/terraform/variables.tf @@ -130,6 +130,13 @@ variable "arm_environment" { description = "Used as an environment variable in the VMSS to set the Azure cloud for Terraform" } +variable "arm_use_msi" { + type = bool + default = false + description = "Used as an environment variable to determine if Terraform should use a managed identity" +} + + variable "stateful_resources_locked" { type = bool default = true @@ -161,9 +168,6 @@ variable "public_deployment_ip_address" { default = "" } -# Important note: it is NOT enough to simply enable the malware scanning on. Further, manual, steps are required -# in order to actually set up the scanner. Setting this property to True without supplying a scanner will result -# in airlock requests being stuck in the in-progress stage. variable "enable_airlock_malware_scanning" { type = bool default = false diff --git a/core/version.txt b/core/version.txt index 8d8e3b7702..3e2f46a3a3 100644 --- a/core/version.txt +++ b/core/version.txt @@ -1 +1 @@ -__version__ = "0.8.9" +__version__ = "0.9.0" diff --git a/docs/azure-tre-overview/airlock.md b/docs/azure-tre-overview/airlock.md index 7e4d86c707..791b024e71 100644 --- a/docs/azure-tre-overview/airlock.md +++ b/docs/azure-tre-overview/airlock.md @@ -66,7 +66,8 @@ This storage location is external for import (`stalimex`) or internal for export The user will be able to upload a file to the provided storage location, using any tool of their preference: [Azure Storage Explorer](https://azure.microsoft.com/en-us/features/storage-explorer/) or [AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10) which is a command line tool. The user Submits the request (TRE API call) starting the data movement (to the `stalimip` - import in-progress or `stalexip` - export in-progress). The airlock request is now in state **Submitted**. -If enabled, the Security Scanning is started. In the case that security flaws are found, the request state becomes **Blocking In-progress** while the data is moved to blocked storage (either import blocked `stalimblocked` or export blocked `stalexblocked`). In this case, the request is finalized with the state **Blocked By Scan**. +If enabled, the Malware Scanning is started. The scan is done using Microsoft Defender for Storage, which is described in details [here](https://learn.microsoft.com/en-us/azure/defender-for-cloud/defender-for-storage-introduction). +In the case that security flaws are found, the request state becomes **Blocking In-progress** while the data is moved to blocked storage (either import blocked `stalimblocked` or export blocked `stalexblocked`). In this case, the request is finalized with the state **Blocked By Scan**. If the Security Scanning does not identify any security flaws, the request state becomes **In-Review**. Simultaneously, a notification is sent to the Airlock Manager user. The user needs to ask for the container URL using the TRE API (SAS token + URL with READ permission). > The Security Scanning can be disabled, changing the request state from **Submitted** straight to **In-Review**. diff --git a/docs/tre-admins/environment-variables.md b/docs/tre-admins/environment-variables.md index a998198d81..0b56fd92b2 100644 --- a/docs/tre-admins/environment-variables.md +++ b/docs/tre-admins/environment-variables.md @@ -31,7 +31,7 @@ | `API_CLIENT_ID` | Generated when following [pre-deployment steps](./setup-instructions/setup-auth-entities.md) guide. Client id of the "TRE API". | | `API_CLIENT_SECRET` | Generated when following [pre-deployment steps](./setup-instructions/setup-auth-entities.md) guide. Client secret of the "TRE API". | | `STATEFUL_RESOURCES_LOCKED` | If set to `false` locks on stateful resources won't be created. A recommended setting for developers. | -| `ENABLE_AIRLOCK_MALWARE_SCANNING` | If False, Airlock requests will skip the malware scanning stage. If set to True, Setting up a scanner manually is required! | +| `ENABLE_AIRLOCK_MALWARE_SCANNING` | If False, Airlock requests will skip the malware scanning stage. If set to True, Defender for Storage will be enabled. | | `ENABLE_LOCAL_DEBUGGING` | Set to `false` by default. Setting this to `true` will ensure that Azure resources are accessible from your local development machine. (e.g. ServiceBus and Cosmos) | | `PUBLIC_DEPLOYMENT_IP_ADDRESS` | The public IP address of the machine that is deploying TRE. (Your desktop or the build agents). In certain locations a dynamic script to retrieve this from [https://ipecho.net/plain](https://ipecho.net/plain) does not work. If this is the case, then you can 'hardcode' your IP. | | `RESOURCE_PROCESSOR_VMSS_SKU` | The SKU of the VMMS to use for the resource processing VM. |