From 304170e52297a14ebb035332abdd36d5d508379f Mon Sep 17 00:00:00 2001 From: Mike Patrick Date: Tue, 21 Nov 2023 10:43:39 +0000 Subject: [PATCH] Build out the publishing latency SLI * Fetches Whitehall events from Logit * Fetches Content Store events from Logit * Turns those events into metrics * Sends those metrics to Prometheus's PushGateway In case it's not obvious what's happening with the `OFFSET_MINUTES` and `INTERVAL_MINUTES` here, I've explained the motivation for `from_time` and `to_time` in the commits that introduced `WhitehallEvents` and `ContentStoreEvents`. --- .../publishing_latency_sli.rb | 61 ++++++- .../publishing_latency_sli/record_metrics.rb | 2 - .../publishing_latency_sli_spec.rb | 155 +++++++++++++++++- 3 files changed, 213 insertions(+), 5 deletions(-) diff --git a/lib/govuk_sli_collector/publishing_latency_sli.rb b/lib/govuk_sli_collector/publishing_latency_sli.rb index 4be4b61..f322725 100644 --- a/lib/govuk_sli_collector/publishing_latency_sli.rb +++ b/lib/govuk_sli_collector/publishing_latency_sli.rb @@ -1,5 +1,64 @@ +require "prometheus/client" +require "prometheus/client/push" + +require "govuk_sli_collector/publishing_latency_sli/content_store_events" +require "govuk_sli_collector/publishing_latency_sli/logit_search" +require "govuk_sli_collector/publishing_latency_sli/record_metrics" +require "govuk_sli_collector/publishing_latency_sli/whitehall_events" + module GovukSliCollector class PublishingLatencySli - def call; end + def initialize + @logit_search = LogitSearch.new( + host: ENV.fetch("LOGIT_OPENSEARCH_HOST"), + basic_auth: ENV.fetch("LOGIT_OPENSEARCH_BASIC_AUTH"), + ) + @prometheus_pushgateway_url = ENV.fetch("PROMETHEUS_PUSHGATEWAY_URL") + @to_time = minutes_ago(Integer(ENV.fetch("OFFSET_MINUTES"))) + @from_time = @to_time - minutes(Integer(ENV.fetch("INTERVAL_MINUTES"))) + end + + def call + whitehall_events = WhitehallEvents.new(logit_search:).call( + from_time:, + to_time:, + ) + + return if whitehall_events.empty? + + content_store_events = ContentStoreEvents.new(logit_search:).call( + from_time:, + matching: whitehall_events, + ) + + return if content_store_events.empty? + + prometheus_registry = Prometheus::Client.registry + + RecordMetrics.new(prometheus_registry:).call( + whitehall_events:, + content_store_events:, + ) + + Prometheus::Client::Push.new( + job: "govuk_sli_collector_publishing_latency_sli", + gateway: prometheus_pushgateway_url, + ).add(prometheus_registry) + end + + private + + attr_reader :logit_search, + :prometheus_pushgateway_url, + :from_time, + :to_time + + def minutes(number_of) + number_of * 60 + end + + def minutes_ago(number_of) + Time.now.utc - minutes(number_of) + end end end diff --git a/lib/govuk_sli_collector/publishing_latency_sli/record_metrics.rb b/lib/govuk_sli_collector/publishing_latency_sli/record_metrics.rb index 3c1e17d..1c2d326 100644 --- a/lib/govuk_sli_collector/publishing_latency_sli/record_metrics.rb +++ b/lib/govuk_sli_collector/publishing_latency_sli/record_metrics.rb @@ -1,5 +1,3 @@ -require "prometheus/client" - module GovukSliCollector class PublishingLatencySli class RecordMetrics diff --git a/spec/govuk_sli_collector/publishing_latency_sli_spec.rb b/spec/govuk_sli_collector/publishing_latency_sli_spec.rb index fed1d38..2666a84 100644 --- a/spec/govuk_sli_collector/publishing_latency_sli_spec.rb +++ b/spec/govuk_sli_collector/publishing_latency_sli_spec.rb @@ -2,10 +2,161 @@ module GovukSliCollector RSpec.describe PublishingLatencySli do - it "requires no arguments to run" do + let(:logit_opensearch_host) { "https://example.logit.io" } + let(:pushgateway) { instance_spy(Prometheus::Client::Push) } + + before do + allow(Prometheus::Client::Push).to receive(:new).and_return(pushgateway) + end + + after do + Prometheus::Client.registry.metrics.each do |metric| + Prometheus::Client.registry.unregister(metric.name) + end + end + + it "requires some environment variables to run" do expect { + described_class.new + }.to raise_error(KeyError) + end + + it "derives metrics from log data and pushes them to Prometheus" do + stub_whitehall_logs_api + stub_content_store_logs_api + + ClimateControl.modify( + INTERVAL_MINUTES: "5", + OFFSET_MINUTES: "5", + LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123", + LOGIT_OPENSEARCH_HOST: logit_opensearch_host, + PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local", + ) do + described_class.new.call + end + + expect(pushgateway).to have_received(:add) + .with(Prometheus::Client.registry) + end + + it "exits early if there were no Whitehall logs data" do + stub_whitehall_logs_api(body: { hits: { hits: [] } }) + + allow(PublishingLatencySli::ContentStoreEvents).to receive(:new) + .and_call_original + + ClimateControl.modify( + INTERVAL_MINUTES: "5", + OFFSET_MINUTES: "5", + LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123", + LOGIT_OPENSEARCH_HOST: logit_opensearch_host, + PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local", + ) do + described_class.new.call + end + + expect(PublishingLatencySli::ContentStoreEvents).not_to have_received(:new) + end + + it "exits early if there were no Content Store logs data" do + stub_whitehall_logs_api + stub_content_store_logs_api(body: { hits: { hits: [] } }) + + allow(PublishingLatencySli::RecordMetrics).to receive(:new) + .and_call_original + + ClimateControl.modify( + INTERVAL_MINUTES: "5", + OFFSET_MINUTES: "5", + LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123", + LOGIT_OPENSEARCH_HOST: logit_opensearch_host, + PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local", + ) do described_class.new.call - }.not_to raise_error + end + + expect(PublishingLatencySli::RecordMetrics).not_to have_received(:new) + end + + it "gets Whitehall logs from within a given time interval, upto an offset" do + whitehall_events = instance_spy(PublishingLatencySli::WhitehallEvents) + allow(whitehall_events).to receive(:call).and_return([]) + allow(PublishingLatencySli::WhitehallEvents).to receive(:new) + .and_return(whitehall_events) + + time_now = Time.new(2023, 11, 16, 12, 15, 30) + ten_minutes_ago = Time.new(2023, 11, 16, 12, 5, 30) + thirty_minutes_ago = Time.new(2023, 11, 16, 11, 45, 30) + + ClimateControl.modify( + INTERVAL_MINUTES: "20", + OFFSET_MINUTES: "10", + LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123", + LOGIT_OPENSEARCH_HOST: logit_opensearch_host, + PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local", + ) do + Timecop.freeze(time_now) do + described_class.new.call + end + end + + expect(whitehall_events).to have_received(:call).with( + from_time: thirty_minutes_ago, + to_time: ten_minutes_ago, + ) + end + + it "gets Content Store logs from the beginning of the time interval" do + stub_whitehall_logs_api + + content_store_events = instance_spy( + PublishingLatencySli::ContentStoreEvents, + ) + allow(content_store_events).to receive(:call).and_return([]) + allow(PublishingLatencySli::ContentStoreEvents).to receive(:new) + .and_return(content_store_events) + + time_now = Time.new(2023, 11, 16, 12, 15, 30) + thirty_minutes_ago = Time.new(2023, 11, 16, 11, 45, 30) + + ClimateControl.modify( + INTERVAL_MINUTES: "20", + OFFSET_MINUTES: "10", + LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123", + LOGIT_OPENSEARCH_HOST: logit_opensearch_host, + PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local", + ) do + Timecop.freeze(time_now) do + described_class.new.call + end + end + + expect(content_store_events).to have_received(:call).with( + matching: anything, + from_time: thirty_minutes_ago, + ) + end + + def whitehall_fixture + fixture_path = "spec/fixtures/logit-opensearch-whitehall-events.json" + JSON.parse(File.read(fixture_path)) + end + + def stub_whitehall_logs_api(body: whitehall_fixture) + stub_request(:get, "#{logit_opensearch_host}/_search") + .with(body: /whitehall-admin/) + .to_return_json(status: 200, body:) + end + + def content_store_fixture + fixture_path = "spec/fixtures/logit-opensearch-content-store-events.json" + JSON.parse(File.read(fixture_path)) + end + + def stub_content_store_logs_api(body: content_store_fixture) + stub_request(:get, "#{logit_opensearch_host}/_search") + .with(body: /content-store/) + .to_return_json(status: 200, body:) end end end