From 6f5c03ee489f0fe22a2cd16bf375b4993a95f05f Mon Sep 17 00:00:00 2001 From: shashank-reddy-nr Date: Tue, 16 Jul 2024 18:56:20 +0530 Subject: [PATCH 1/5] fix(golden-alerts): updated legacy alert conditions to nrql alerts in golden alerts module --- examples/modules/golden-signal-alerts/main.tf | 22 ++-- .../new-golden-signal-alerts/README.md | 90 +++++++++++++++ .../modules/new-golden-signal-alerts/main.tf | 103 ++++++++++++++++++ .../new-golden-signal-alerts/outputs.tf | 19 ++++ .../new-golden-signal-alerts/variables.tf | 16 +++ 5 files changed, 243 insertions(+), 7 deletions(-) create mode 100644 examples/modules/new-golden-signal-alerts/README.md create mode 100644 examples/modules/new-golden-signal-alerts/main.tf create mode 100644 examples/modules/new-golden-signal-alerts/outputs.tf create mode 100644 examples/modules/new-golden-signal-alerts/variables.tf diff --git a/examples/modules/golden-signal-alerts/main.tf b/examples/modules/golden-signal-alerts/main.tf index 0d0e1f3c9..eccb70a55 100644 --- a/examples/modules/golden-signal-alerts/main.tf +++ b/examples/modules/golden-signal-alerts/main.tf @@ -1,3 +1,11 @@ +terraform { + required_providers { + newrelic = { + source = "newrelic/newrelic" + } + } +} + data "newrelic_entity" "application" { name = var.service.name type = "APPLICATION" @@ -17,7 +25,7 @@ resource "newrelic_alert_condition" "response_time_web" { metric = "response_time_web" condition_scope = "application" - critical { + term { duration = var.service.duration threshold = var.service.response_time_threshold operator = "above" @@ -30,11 +38,11 @@ resource "newrelic_alert_condition" "throughput_web" { name = "Low Throughput (web)" type = "apm_app_metric" - entities = [data.newrelic_application.application.application_id] + entities = [data.newrelic_entity.application.application_id] metric = "throughput_web" condition_scope = "application" - critical { + term { duration = var.service.duration threshold = var.service.throughput_threshold operator = "below" @@ -47,11 +55,11 @@ resource "newrelic_alert_condition" "error_percentage" { name = "High Error Percentage" type = "apm_app_metric" - entities = [data.newrelic_application.application.application_id] + entities = [data.newrelic_entity.application.application_id] metric = "error_percentage" condition_scope = "application" - critical { + term { duration = var.service.duration threshold = var.service.error_percentage_threshold operator = "above" @@ -67,9 +75,9 @@ resource "newrelic_infra_alert_condition" "high_cpu" { event = "SystemSample" select = "cpuPercent" comparison = "above" - where = "(`applicationId` = '${data.newrelic_application.application.application_id}')" + where = "(`applicationId` = '${data.newrelic_entity.application.application_id}')" - critical { + term { duration = var.service.duration value = var.service.cpu_threshold time_function = "all" diff --git a/examples/modules/new-golden-signal-alerts/README.md b/examples/modules/new-golden-signal-alerts/README.md new file mode 100644 index 000000000..374c41397 --- /dev/null +++ b/examples/modules/new-golden-signal-alerts/README.md @@ -0,0 +1,90 @@ +# [Golden Signal Alerts](modules/new-golden-signal-alerts) +This module encapsulates an alerting strategy based on the [Four Golden Signals](https://landing.google.com/sre/sre-book/chapters/monitoring-distributed-systems/#xref_monitoring_golden-signals) introduced in Google’s widely read book on [Site Reliability Engineering](https://landing.google.com/sre/sre-book/toc/index.html). + +The signals chosen for this module are: + +* *Latency*: High response time (seconds) +* *Traffic*: Low throughput (requests/minute) +* *Errors*: Error rate (errors/minute) +* *Saturation*: CPU utilization (percentage utilized) + +### Requirements +Applications making use of this module need to be reporting data into both APM and Infrastructure. + +### Input variables +The following input variables are accepted by the module: + +* `name`: The APM application name as reported to New Relic +* `threshold_duration`: The duration that the threshold must violate in order to create an incident, in seconds. +* `cpu_threshold`: The critical threshold of the CPU utilization condition, as a percentage +* `error_percentage_threshold`: The critical threshold of the error rate condition, in errors/second +* `response_time_threshold`: The critical threshold of the response time condition, in seconds +* `throughput_threshold`: The critical threshold of the throughput condition, in requests/second + +### Outputs +The following output values are provided by the module: + +* `policy_id`: The ID of the created alert policy +* `cpu_condition_id`: The ID of the created high CPU alert condition +* `error_percentage_condition_id`: The ID of the created error percentage alert condition +* `response_time_condition_id`: The ID of the created response time alert condition +* `throughput_condition_id`: The ID of the created throughput alert condition + + +### Example usage +```terraform + +data "newrelic_notification_destination" "webhook_destination" { + name = "Golden Signal Webhook Testing" +} + +# Resource +resource "newrelic_notification_channel" "webhook_notification_channel" { + name = "webhook-example" + type = "WEBHOOK" + destination_id = data.newrelic_notification_destination.webhook_destination.id + product = "IINT" + + property { + key = "payload" + value = "{\n\t\"name\": \"foo\"\n}" + label = "Payload Template" + } +} + +data "newrelic_notification_destination" "email_destination" { + name = "golden signals testing mail" +} + +resource "newrelic_notification_channel" "email_notification_channel" { + name = "email-example" + type = "EMAIL" + destination_id = data.newrelic_notification_destination.email_destination.id + product = "IINT" + + property { + key = "subject" + value = "New Subject Title" + } + + property { + key = "customDetailsEmail" + value = "issue id - {{issueId}}" + } +} + +module "webportal_alerts" { + source = "../examples/modules/new-golden-signal-alerts" // Need to change path according to your tf config file folder level, + // here given example source path is from assuming that your td config code in testing folder + notification_channel_ids = [newrelic_notification_channel.webhook_notification_channel.id, newrelic_notification_channel.email_notification_channel.id] + + service = { + name = "Dummy App Pro Max" + threshold_duration = 420 + cpu_threshold = 90 + response_time_threshold = 180 + error_percentage_threshold = 5 + throughput_threshold = 300 + } +} +``` diff --git a/examples/modules/new-golden-signal-alerts/main.tf b/examples/modules/new-golden-signal-alerts/main.tf new file mode 100644 index 000000000..db1245ab2 --- /dev/null +++ b/examples/modules/new-golden-signal-alerts/main.tf @@ -0,0 +1,103 @@ +terraform { + required_providers { + newrelic = { + source = "newrelic/newrelic" + } + } +} +data "newrelic_entity" "application" { + name = var.service.name + type = "APPLICATION" + domain = "APM" +} + +resource "newrelic_alert_policy" "golden_signal_policy" { + name = "Golden Signals - ${var.service.name}" +} + +resource "newrelic_nrql_alert_condition" "response_time_web" { + policy_id = newrelic_alert_policy.golden_signal_policy.id + name = "High Response Time (web)" + + nrql { + query = "SELECT filter(average(newrelic.timeslice.value), WHERE metricTimesliceName = 'HttpDispatcher') OR 0 FROM Metric WHERE appId IN (${data.newrelic_entity.application.application_id}) AND metricTimesliceName IN ('HttpDispatcher', 'Agent/MetricsReported/count') FACET appId" + } + + critical { + operator = "above" + threshold = var.service.response_time_threshold + threshold_duration = var.service.threshold_duration + threshold_occurrences = "all" + } +} + +resource "newrelic_nrql_alert_condition" "throughput_web" { + policy_id = newrelic_alert_policy.golden_signal_policy.id + name = "Low Throughput (web)" + + nrql { + query = "SELECT filter(count(newrelic.timeslice.value), WHERE metricTimesliceName = 'HttpDispatcher') OR 0 FROM Metric WHERE appId IN (${data.newrelic_entity.application.application_id}) AND metricTimesliceName IN ('HttpDispatcher', 'Agent/MetricsReported/count') FACET appId" + } + + critical { + operator = "below" + threshold = var.service.throughput_threshold + threshold_duration = var.service.threshold_duration + threshold_occurrences = "all" + } +} + +resource "newrelic_nrql_alert_condition" "error_percentage" { + policy_id = newrelic_alert_policy.golden_signal_policy.id + name = "High Error Percentage" + + + nrql { + query = "SELECT ((filter(count(newrelic.timeslice.value), where metricTimesliceName = 'Errors/all') / filter(count(newrelic.timeslice.value), WHERE metricTimesliceName IN ('HttpDispatcher', 'OtherTransaction/all'))) OR 0) * 100 FROM Metric WHERE appId IN (${data.newrelic_entity.application.application_id}) AND metricTimesliceName IN ('Errors/all', 'HttpDispatcher', 'OtherTransaction/all', 'Agent/MetricsReported/count') FACET appId" + } + + critical { + operator = "above" + threshold = var.service.error_percentage_threshold + threshold_duration = var.service.threshold_duration + threshold_occurrences = "all" + } +} + +resource "newrelic_nrql_alert_condition" "high_cpu" { + policy_id = newrelic_alert_policy.golden_signal_policy.id + name = "High CPU usage" + + nrql { + query = "SELECT average(cpuPercent) FROM SystemSample WHERE (`applicationId` = '${data.newrelic_entity.application.application_id}') FACET entityId" + } + + critical { + operator = "above" + threshold = var.service.cpu_threshold + threshold_duration = var.service.threshold_duration + threshold_occurrences = "all" + } +} + +resource "newrelic_workflow" "golden_signal_workflow" { + name = "Golden Signals Workflow ${var.service.name}" + muting_rules_handling = "NOTIFY_ALL_ISSUES" + + issues_filter { + name = " Golden signal policy Ids filter" + type = "FILTER" + + predicate { + attribute = "labels.policyIds" + operator = "EXACTLY_MATCHES" + values = [ newrelic_alert_policy.golden_signal_policy.id ] + } + } + dynamic "destination"{ + for_each = var.notification_channel_ids + content { + channel_id = destination.value + } + } +} \ No newline at end of file diff --git a/examples/modules/new-golden-signal-alerts/outputs.tf b/examples/modules/new-golden-signal-alerts/outputs.tf new file mode 100644 index 000000000..29ab70abe --- /dev/null +++ b/examples/modules/new-golden-signal-alerts/outputs.tf @@ -0,0 +1,19 @@ +output "policy_id" { + value = newrelic_alert_policy.golden_signal_policy.id +} + +output "response_time_condition_id" { + value = newrelic_nrql_alert_condition.response_time_web.id +} + +output "throughput_condition_id" { + value = newrelic_nrql_alert_condition.throughput_web.id +} + +output "error_percentage_condition_id" { + value = newrelic_nrql_alert_condition.error_percentage.id +} + +output "cpu_condition_id" { + value = newrelic_nrql_alert_condition.high_cpu.id +} diff --git a/examples/modules/new-golden-signal-alerts/variables.tf b/examples/modules/new-golden-signal-alerts/variables.tf new file mode 100644 index 000000000..931a72a8e --- /dev/null +++ b/examples/modules/new-golden-signal-alerts/variables.tf @@ -0,0 +1,16 @@ +variable "service" { + description = "The service to create alerts for" + type = object({ + name = string + threshold_duration = number + cpu_threshold = number + response_time_threshold = number + error_percentage_threshold = number + throughput_threshold = number + }) +} + +variable "notification_channel_ids" { + description = "The notification channel IDs to add to this policy" + type = list(string) +} From 7320006b52fa8cdd0dcd2ff5db98d8f5dbf7a950 Mon Sep 17 00:00:00 2001 From: shashank-reddy-nr Date: Fri, 19 Jul 2024 17:51:46 +0530 Subject: [PATCH 2/5] fix(golden-signal-alerts): minor changes to add few missing attributes --- .../README.md | 12 +- .../modules/golden-signal-alerts-new/main.tf | 103 ++++++++++++++++ .../outputs.tf | 0 .../golden-signal-alerts-new/providers.tf | 7 ++ .../golden-signal-alerts-new/variables.tf | 16 +++ .../modules/golden-signal-alerts/README.md | 6 +- examples/modules/golden-signal-alerts/main.tf | 116 ++++++++---------- .../modules/golden-signal-alerts/providers.tf | 7 ++ .../modules/golden-signal-alerts/variables.tf | 22 ++-- .../modules/new-golden-signal-alerts/main.tf | 103 ---------------- .../new-golden-signal-alerts/variables.tf | 16 --- 11 files changed, 209 insertions(+), 199 deletions(-) rename examples/modules/{new-golden-signal-alerts => golden-signal-alerts-new}/README.md (82%) create mode 100644 examples/modules/golden-signal-alerts-new/main.tf rename examples/modules/{new-golden-signal-alerts => golden-signal-alerts-new}/outputs.tf (100%) create mode 100644 examples/modules/golden-signal-alerts-new/providers.tf create mode 100644 examples/modules/golden-signal-alerts-new/variables.tf create mode 100644 examples/modules/golden-signal-alerts/providers.tf delete mode 100644 examples/modules/new-golden-signal-alerts/main.tf delete mode 100644 examples/modules/new-golden-signal-alerts/variables.tf diff --git a/examples/modules/new-golden-signal-alerts/README.md b/examples/modules/golden-signal-alerts-new/README.md similarity index 82% rename from examples/modules/new-golden-signal-alerts/README.md rename to examples/modules/golden-signal-alerts-new/README.md index 374c41397..1bcf7bfb3 100644 --- a/examples/modules/new-golden-signal-alerts/README.md +++ b/examples/modules/golden-signal-alerts-new/README.md @@ -17,7 +17,7 @@ The following input variables are accepted by the module: * `name`: The APM application name as reported to New Relic * `threshold_duration`: The duration that the threshold must violate in order to create an incident, in seconds. * `cpu_threshold`: The critical threshold of the CPU utilization condition, as a percentage -* `error_percentage_threshold`: The critical threshold of the error rate condition, in errors/second +* `error_percentage_threshold`: The critical threshold of the error rate condition, as a percentage * `response_time_threshold`: The critical threshold of the response time condition, in seconds * `throughput_threshold`: The critical threshold of the throughput condition, in requests/second @@ -74,16 +74,18 @@ resource "newrelic_notification_channel" "email_notification_channel" { } module "webportal_alerts" { - source = "../examples/modules/new-golden-signal-alerts" // Need to change path according to your tf config file folder level, - // here given example source path is from assuming that your td config code in testing folder + // Please specify the path of the source of this module according to the location you've placed the module in. + // The path specified below assumes you're using this module from a clone of this repo, in the `newrelic.tf` file in the `testing` folder. + // However, if you'd like to use a remote version of this module (without a cloned version of this), the right value of the argument source would be "github.com/newrelic/terraform-provider-newrelic//examples/modules/golden-signal-alerts-new". + source = "../examples/modules/golden-signal-alerts-new" notification_channel_ids = [newrelic_notification_channel.webhook_notification_channel.id, newrelic_notification_channel.email_notification_channel.id] service = { name = "Dummy App Pro Max" threshold_duration = 420 cpu_threshold = 90 - response_time_threshold = 180 - error_percentage_threshold = 5 + response_time_threshold = 5 + error_percentage_threshold = 10 throughput_threshold = 300 } } diff --git a/examples/modules/golden-signal-alerts-new/main.tf b/examples/modules/golden-signal-alerts-new/main.tf new file mode 100644 index 000000000..ad4ec0c77 --- /dev/null +++ b/examples/modules/golden-signal-alerts-new/main.tf @@ -0,0 +1,103 @@ +data "newrelic_entity" "application" { + name = var.service.name + type = "APPLICATION" + domain = "APM" +} + +resource "newrelic_alert_policy" "golden_signal_policy" { + name = "Golden Signals - ${var.service.name}" +} + +resource "newrelic_nrql_alert_condition" "response_time_web" { + policy_id = newrelic_alert_policy.golden_signal_policy.id + name = "High Response Time (web)" + fill_option = "static" + fill_value = 0 + + nrql { + query = "SELECT filter(average(newrelic.timeslice.value), WHERE metricTimesliceName = 'HttpDispatcher') OR 0 FROM Metric WHERE appId IN (${data.newrelic_entity.application.application_id}) AND metricTimesliceName IN ('HttpDispatcher', 'Agent/MetricsReported/count') FACET appId" + } + + critical { + operator = "above" + threshold = var.service.response_time_threshold + threshold_duration = var.service.threshold_duration + threshold_occurrences = "all" + } +} + +resource "newrelic_nrql_alert_condition" "throughput_web" { + policy_id = newrelic_alert_policy.golden_signal_policy.id + name = "Low Throughput (web)" + fill_option = "static" + fill_value = 0 + + nrql { + query = "SELECT filter(count(newrelic.timeslice.value), WHERE metricTimesliceName = 'HttpDispatcher') OR 0 FROM Metric WHERE appId IN (${data.newrelic_entity.application.application_id}) AND metricTimesliceName IN ('HttpDispatcher', 'Agent/MetricsReported/count') FACET appId" + } + + critical { + operator = "below" + threshold = var.service.throughput_threshold + threshold_duration = var.service.threshold_duration + threshold_occurrences = "all" + } +} + +resource "newrelic_nrql_alert_condition" "error_percentage" { + policy_id = newrelic_alert_policy.golden_signal_policy.id + name = "High Error Percentage" + fill_option = "static" + fill_value = 0 + + nrql { + query = "SELECT ((filter(count(newrelic.timeslice.value), where metricTimesliceName = 'Errors/all') / filter(count(newrelic.timeslice.value), WHERE metricTimesliceName IN ('HttpDispatcher', 'OtherTransaction/all'))) OR 0) * 100 FROM Metric WHERE appId IN (${data.newrelic_entity.application.application_id}) AND metricTimesliceName IN ('Errors/all', 'HttpDispatcher', 'OtherTransaction/all', 'Agent/MetricsReported/count') FACET appId" + } + + critical { + operator = "above" + threshold = var.service.error_percentage_threshold + threshold_duration = var.service.threshold_duration + threshold_occurrences = "all" + } +} + +resource "newrelic_nrql_alert_condition" "high_cpu" { + policy_id = newrelic_alert_policy.golden_signal_policy.id + name = "High CPU usage" + fill_option = "static" + fill_value = 0 + + nrql { + query = "SELECT average(cpuPercent) FROM SystemSample WHERE (`applicationId` = '${data.newrelic_entity.application.application_id}') FACET entityId" + } + + critical { + operator = "above" + threshold = var.service.cpu_threshold + threshold_duration = var.service.threshold_duration + threshold_occurrences = "all" + } +} + +resource "newrelic_workflow" "golden_signal_workflow" { + name = "Golden Signals Workflow ${var.service.name}" + muting_rules_handling = "NOTIFY_ALL_ISSUES" + + issues_filter { + name = " Golden signal policy Ids filter" + type = "FILTER" + + predicate { + attribute = "labels.policyIds" + operator = "EXACTLY_MATCHES" + values = [newrelic_alert_policy.golden_signal_policy.id] + } + } + dynamic "destination" { + for_each = var.notification_channel_ids + content { + channel_id = destination.value + } + } +} \ No newline at end of file diff --git a/examples/modules/new-golden-signal-alerts/outputs.tf b/examples/modules/golden-signal-alerts-new/outputs.tf similarity index 100% rename from examples/modules/new-golden-signal-alerts/outputs.tf rename to examples/modules/golden-signal-alerts-new/outputs.tf diff --git a/examples/modules/golden-signal-alerts-new/providers.tf b/examples/modules/golden-signal-alerts-new/providers.tf new file mode 100644 index 000000000..31a8cc075 --- /dev/null +++ b/examples/modules/golden-signal-alerts-new/providers.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + newrelic = { + source = "newrelic/newrelic" + } + } +} diff --git a/examples/modules/golden-signal-alerts-new/variables.tf b/examples/modules/golden-signal-alerts-new/variables.tf new file mode 100644 index 000000000..aa10b6c72 --- /dev/null +++ b/examples/modules/golden-signal-alerts-new/variables.tf @@ -0,0 +1,16 @@ +variable "service" { + description = "The service to create alerts for" + type = object({ + name = string + threshold_duration = number + cpu_threshold = number + response_time_threshold = number + error_percentage_threshold = number + throughput_threshold = number + }) +} + +variable "notification_channel_ids" { + description = "The notification channel IDs to add to this policy" + type = list(string) +} diff --git a/examples/modules/golden-signal-alerts/README.md b/examples/modules/golden-signal-alerts/README.md index f3abb8f62..d12b420c0 100644 --- a/examples/modules/golden-signal-alerts/README.md +++ b/examples/modules/golden-signal-alerts/README.md @@ -1,6 +1,8 @@ -# [Golden Signal Alerts](modules/golden-signal-alerts) +# [Golden Signal Alerts](modules/golden-signal-alerts) [Deprecated]: This module encapsulates an alerting strategy based on the [Four Golden Signals](https://landing.google.com/sre/sre-book/chapters/monitoring-distributed-systems/#xref_monitoring_golden-signals) introduced in Google’s widely read book on [Site Reliability Engineering](https://landing.google.com/sre/sre-book/toc/index.html). +-> **WARNING:** The [golden-signal-alerts](modules/golden-signal-alerts) module is deprecated and will be removed in the next major release. The module [golden-signal-alerts-new](modules/golden-signal-alerts-new) would be a preferred alternative to `golden-signal-alerts`. + The signals chosen for this module are: * *Latency*: High response time (seconds) @@ -17,7 +19,7 @@ The following input variables are accepted by the module: * `name`: The APM application name as reported to New Relic * `duration`: The duration to evaluate the alert conditions over, in minutes * `cpu_threshold`: The critical threshold of the CPU utilization condition, as a percentage -* `error_percentage_threshold`: The critical threshold of the error rate condition, in errors/min +* `error_percentage_threshold`: The critical threshold of the error rate condition, as a percentage * `response_time_threshold`: The critical threshold of the response time condition, in seconds * `throughput_threshold`: The critical threshold of the throughput condition, in requests/min diff --git a/examples/modules/golden-signal-alerts/main.tf b/examples/modules/golden-signal-alerts/main.tf index eccb70a55..d381b9861 100644 --- a/examples/modules/golden-signal-alerts/main.tf +++ b/examples/modules/golden-signal-alerts/main.tf @@ -1,90 +1,82 @@ -terraform { - required_providers { - newrelic = { - source = "newrelic/newrelic" - } - } -} - data "newrelic_entity" "application" { - name = var.service.name - type = "APPLICATION" - domain = "APM" + name = var.service.name + type = "APPLICATION" + domain = "APM" } resource "newrelic_alert_policy" "golden_signal_policy" { - name = "Golden Signals - ${var.service.name}" + name = "Golden Signals - ${var.service.name}" } resource "newrelic_alert_condition" "response_time_web" { - policy_id = newrelic_alert_policy.golden_signal_policy.id + policy_id = newrelic_alert_policy.golden_signal_policy.id - name = "High Response Time (web)" - type = "apm_app_metric" - entities = [data.newrelic_entity.application.application_id] - metric = "response_time_web" - condition_scope = "application" + name = "High Response Time (web)" + type = "apm_app_metric" + entities = [data.newrelic_entity.application.application_id] + metric = "response_time_web" + condition_scope = "application" - term { - duration = var.service.duration - threshold = var.service.response_time_threshold - operator = "above" - time_function = "all" - } + term { + duration = var.service.duration + threshold = var.service.response_time_threshold + operator = "above" + time_function = "all" + } } resource "newrelic_alert_condition" "throughput_web" { - policy_id = newrelic_alert_policy.golden_signal_policy.id + policy_id = newrelic_alert_policy.golden_signal_policy.id - name = "Low Throughput (web)" - type = "apm_app_metric" - entities = [data.newrelic_entity.application.application_id] - metric = "throughput_web" - condition_scope = "application" + name = "Low Throughput (web)" + type = "apm_app_metric" + entities = [data.newrelic_entity.application.application_id] + metric = "throughput_web" + condition_scope = "application" - term { - duration = var.service.duration - threshold = var.service.throughput_threshold - operator = "below" - time_function = "all" - } + term { + duration = var.service.duration + threshold = var.service.throughput_threshold + operator = "below" + time_function = "all" + } } resource "newrelic_alert_condition" "error_percentage" { - policy_id = newrelic_alert_policy.golden_signal_policy.id + policy_id = newrelic_alert_policy.golden_signal_policy.id - name = "High Error Percentage" - type = "apm_app_metric" - entities = [data.newrelic_entity.application.application_id] - metric = "error_percentage" - condition_scope = "application" + name = "High Error Percentage" + type = "apm_app_metric" + entities = [data.newrelic_entity.application.application_id] + metric = "error_percentage" + condition_scope = "application" - term { - duration = var.service.duration - threshold = var.service.error_percentage_threshold - operator = "above" - time_function = "all" - } + term { + duration = var.service.duration + threshold = var.service.error_percentage_threshold + operator = "above" + time_function = "all" + } } resource "newrelic_infra_alert_condition" "high_cpu" { - policy_id = newrelic_alert_policy.golden_signal_policy.id + policy_id = newrelic_alert_policy.golden_signal_policy.id - name = "High CPU usage" - type = "infra_metric" - event = "SystemSample" - select = "cpuPercent" - comparison = "above" - where = "(`applicationId` = '${data.newrelic_entity.application.application_id}')" + name = "High CPU usage" + type = "infra_metric" + event = "SystemSample" + select = "cpuPercent" + comparison = "above" + where = "(`applicationId` = '${data.newrelic_entity.application.application_id}')" - term { - duration = var.service.duration - value = var.service.cpu_threshold - time_function = "all" - } + critical { + duration = var.service.duration + value = var.service.cpu_threshold + time_function = "all" + } } resource "newrelic_alert_policy_channel" "alert_policy_channel" { - policy_id = newrelic_alert_policy.golden_signal_policy.id - channel_ids = var.alert_channel_ids + policy_id = newrelic_alert_policy.golden_signal_policy.id + channel_ids = var.alert_channel_ids } diff --git a/examples/modules/golden-signal-alerts/providers.tf b/examples/modules/golden-signal-alerts/providers.tf new file mode 100644 index 000000000..31a8cc075 --- /dev/null +++ b/examples/modules/golden-signal-alerts/providers.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + newrelic = { + source = "newrelic/newrelic" + } + } +} diff --git a/examples/modules/golden-signal-alerts/variables.tf b/examples/modules/golden-signal-alerts/variables.tf index 17449bdbe..abed7a591 100644 --- a/examples/modules/golden-signal-alerts/variables.tf +++ b/examples/modules/golden-signal-alerts/variables.tf @@ -1,16 +1,16 @@ variable "service" { - description = "The service to create alerts for" - type = object({ - name = string - duration = number - cpu_threshold = number - response_time_threshold = number - error_percentage_threshold = number - throughput_threshold = number - }) + description = "The service to create alerts for" + type = object({ + name = string + duration = number + cpu_threshold = number + response_time_threshold = number + error_percentage_threshold = number + throughput_threshold = number + }) } variable "alert_channel_ids" { - description = "The notification channel IDs to add to this policy" - type = list(number) + description = "The notification channel IDs to add to this policy" + type = list(number) } diff --git a/examples/modules/new-golden-signal-alerts/main.tf b/examples/modules/new-golden-signal-alerts/main.tf deleted file mode 100644 index db1245ab2..000000000 --- a/examples/modules/new-golden-signal-alerts/main.tf +++ /dev/null @@ -1,103 +0,0 @@ -terraform { - required_providers { - newrelic = { - source = "newrelic/newrelic" - } - } -} -data "newrelic_entity" "application" { - name = var.service.name - type = "APPLICATION" - domain = "APM" -} - -resource "newrelic_alert_policy" "golden_signal_policy" { - name = "Golden Signals - ${var.service.name}" -} - -resource "newrelic_nrql_alert_condition" "response_time_web" { - policy_id = newrelic_alert_policy.golden_signal_policy.id - name = "High Response Time (web)" - - nrql { - query = "SELECT filter(average(newrelic.timeslice.value), WHERE metricTimesliceName = 'HttpDispatcher') OR 0 FROM Metric WHERE appId IN (${data.newrelic_entity.application.application_id}) AND metricTimesliceName IN ('HttpDispatcher', 'Agent/MetricsReported/count') FACET appId" - } - - critical { - operator = "above" - threshold = var.service.response_time_threshold - threshold_duration = var.service.threshold_duration - threshold_occurrences = "all" - } -} - -resource "newrelic_nrql_alert_condition" "throughput_web" { - policy_id = newrelic_alert_policy.golden_signal_policy.id - name = "Low Throughput (web)" - - nrql { - query = "SELECT filter(count(newrelic.timeslice.value), WHERE metricTimesliceName = 'HttpDispatcher') OR 0 FROM Metric WHERE appId IN (${data.newrelic_entity.application.application_id}) AND metricTimesliceName IN ('HttpDispatcher', 'Agent/MetricsReported/count') FACET appId" - } - - critical { - operator = "below" - threshold = var.service.throughput_threshold - threshold_duration = var.service.threshold_duration - threshold_occurrences = "all" - } -} - -resource "newrelic_nrql_alert_condition" "error_percentage" { - policy_id = newrelic_alert_policy.golden_signal_policy.id - name = "High Error Percentage" - - - nrql { - query = "SELECT ((filter(count(newrelic.timeslice.value), where metricTimesliceName = 'Errors/all') / filter(count(newrelic.timeslice.value), WHERE metricTimesliceName IN ('HttpDispatcher', 'OtherTransaction/all'))) OR 0) * 100 FROM Metric WHERE appId IN (${data.newrelic_entity.application.application_id}) AND metricTimesliceName IN ('Errors/all', 'HttpDispatcher', 'OtherTransaction/all', 'Agent/MetricsReported/count') FACET appId" - } - - critical { - operator = "above" - threshold = var.service.error_percentage_threshold - threshold_duration = var.service.threshold_duration - threshold_occurrences = "all" - } -} - -resource "newrelic_nrql_alert_condition" "high_cpu" { - policy_id = newrelic_alert_policy.golden_signal_policy.id - name = "High CPU usage" - - nrql { - query = "SELECT average(cpuPercent) FROM SystemSample WHERE (`applicationId` = '${data.newrelic_entity.application.application_id}') FACET entityId" - } - - critical { - operator = "above" - threshold = var.service.cpu_threshold - threshold_duration = var.service.threshold_duration - threshold_occurrences = "all" - } -} - -resource "newrelic_workflow" "golden_signal_workflow" { - name = "Golden Signals Workflow ${var.service.name}" - muting_rules_handling = "NOTIFY_ALL_ISSUES" - - issues_filter { - name = " Golden signal policy Ids filter" - type = "FILTER" - - predicate { - attribute = "labels.policyIds" - operator = "EXACTLY_MATCHES" - values = [ newrelic_alert_policy.golden_signal_policy.id ] - } - } - dynamic "destination"{ - for_each = var.notification_channel_ids - content { - channel_id = destination.value - } - } -} \ No newline at end of file diff --git a/examples/modules/new-golden-signal-alerts/variables.tf b/examples/modules/new-golden-signal-alerts/variables.tf deleted file mode 100644 index 931a72a8e..000000000 --- a/examples/modules/new-golden-signal-alerts/variables.tf +++ /dev/null @@ -1,16 +0,0 @@ -variable "service" { - description = "The service to create alerts for" - type = object({ - name = string - threshold_duration = number - cpu_threshold = number - response_time_threshold = number - error_percentage_threshold = number - throughput_threshold = number - }) -} - -variable "notification_channel_ids" { - description = "The notification channel IDs to add to this policy" - type = list(string) -} From ab2d71a0d9cf8238b5bcd60091826166e05b750d Mon Sep 17 00:00:00 2001 From: shashank-reddy-nr Date: Fri, 19 Jul 2024 18:16:32 +0530 Subject: [PATCH 3/5] fix(golden-alerts-new): readMe file update --- examples/modules/golden-signal-alerts-new/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/modules/golden-signal-alerts-new/README.md b/examples/modules/golden-signal-alerts-new/README.md index 1bcf7bfb3..1295026f6 100644 --- a/examples/modules/golden-signal-alerts-new/README.md +++ b/examples/modules/golden-signal-alerts-new/README.md @@ -1,4 +1,4 @@ -# [Golden Signal Alerts](modules/new-golden-signal-alerts) +# [Golden Signal Alerts (New)](modules/golden-signal-alerts-new) This module encapsulates an alerting strategy based on the [Four Golden Signals](https://landing.google.com/sre/sre-book/chapters/monitoring-distributed-systems/#xref_monitoring_golden-signals) introduced in Google’s widely read book on [Site Reliability Engineering](https://landing.google.com/sre/sre-book/toc/index.html). The signals chosen for this module are: From 1347d338886965a346acc7a9bc138de03fb1bb13 Mon Sep 17 00:00:00 2001 From: pranav-new-relic Date: Fri, 19 Jul 2024 21:10:50 +0530 Subject: [PATCH 4/5] docs(golden-signal-alerts-new): tiny corrections --- examples/modules/golden-signal-alerts-new/README.md | 2 +- .../modules/golden-signal-alerts-new/variables.tf | 2 +- examples/modules/golden-signal-alerts/README.md | 12 +++++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/examples/modules/golden-signal-alerts-new/README.md b/examples/modules/golden-signal-alerts-new/README.md index 1295026f6..9dbf37e32 100644 --- a/examples/modules/golden-signal-alerts-new/README.md +++ b/examples/modules/golden-signal-alerts-new/README.md @@ -15,7 +15,7 @@ Applications making use of this module need to be reporting data into both APM a The following input variables are accepted by the module: * `name`: The APM application name as reported to New Relic -* `threshold_duration`: The duration that the threshold must violate in order to create an incident, in seconds. +* `threshold_duration`: The duration, in seconds, that the condition must violate the threshold before creating a violation. * `cpu_threshold`: The critical threshold of the CPU utilization condition, as a percentage * `error_percentage_threshold`: The critical threshold of the error rate condition, as a percentage * `response_time_threshold`: The critical threshold of the response time condition, in seconds diff --git a/examples/modules/golden-signal-alerts-new/variables.tf b/examples/modules/golden-signal-alerts-new/variables.tf index aa10b6c72..5f05601c3 100644 --- a/examples/modules/golden-signal-alerts-new/variables.tf +++ b/examples/modules/golden-signal-alerts-new/variables.tf @@ -11,6 +11,6 @@ variable "service" { } variable "notification_channel_ids" { - description = "The notification channel IDs to add to this policy" + description = "The IDs of notification channels to add to this policy" type = list(string) } diff --git a/examples/modules/golden-signal-alerts/README.md b/examples/modules/golden-signal-alerts/README.md index d12b420c0..44f1f7a49 100644 --- a/examples/modules/golden-signal-alerts/README.md +++ b/examples/modules/golden-signal-alerts/README.md @@ -1,7 +1,13 @@ -# [Golden Signal Alerts](modules/golden-signal-alerts) [Deprecated]: -This module encapsulates an alerting strategy based on the [Four Golden Signals](https://landing.google.com/sre/sre-book/chapters/monitoring-distributed-systems/#xref_monitoring_golden-signals) introduced in Google’s widely read book on [Site Reliability Engineering](https://landing.google.com/sre/sre-book/toc/index.html). +# Module: Golden Signal Alerts [Deprecated]: + +**⚠ WARNING**: + +This module, [golden-signal-alerts](https://github.com/newrelic/terraform-provider-newrelic/tree/main/examples/modules/golden-signal-alerts), functions using multiple resources in the New Relic Terraform Provider that have been **deprecated** and will be removed in the next major release. These resources include `newrelic_alert_policy_channel`, `newrelic_infra_alert_condition`, and `newrelic_alert_condition`. --> **WARNING:** The [golden-signal-alerts](modules/golden-signal-alerts) module is deprecated and will be removed in the next major release. The module [golden-signal-alerts-new](modules/golden-signal-alerts-new) would be a preferred alternative to `golden-signal-alerts`. +To set up golden signal alerts using a similar module with newer alternatives to the legacy resources listed above, **please use the newer alternative to the module linked above, which has recently been added: [golden-signal-alerts-new](https://github.com/newrelic/terraform-provider-newrelic/tree/main/examples/modules/golden-signal-alerts-new)**. +______ + +This module encapsulates an alerting strategy based on the [Four Golden Signals](https://landing.google.com/sre/sre-book/chapters/monitoring-distributed-systems/#xref_monitoring_golden-signals) introduced in Google’s widely read book on [Site Reliability Engineering](https://landing.google.com/sre/sre-book/toc/index.html). The signals chosen for this module are: From 06216cd56520fd50cd79386831e68c050a1d5e83 Mon Sep 17 00:00:00 2001 From: pranav-new-relic Date: Fri, 19 Jul 2024 21:15:35 +0530 Subject: [PATCH 5/5] docs(golden-signal-alerts-new): tiny header change --- examples/modules/golden-signal-alerts-new/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/modules/golden-signal-alerts-new/README.md b/examples/modules/golden-signal-alerts-new/README.md index 9dbf37e32..115dcf166 100644 --- a/examples/modules/golden-signal-alerts-new/README.md +++ b/examples/modules/golden-signal-alerts-new/README.md @@ -1,4 +1,4 @@ -# [Golden Signal Alerts (New)](modules/golden-signal-alerts-new) +# Module: Golden Signal Alerts [New]: This module encapsulates an alerting strategy based on the [Four Golden Signals](https://landing.google.com/sre/sre-book/chapters/monitoring-distributed-systems/#xref_monitoring_golden-signals) introduced in Google’s widely read book on [Site Reliability Engineering](https://landing.google.com/sre/sre-book/toc/index.html). The signals chosen for this module are: