Skip to content

Commit

Permalink
Build out the publishing latency SLI
Browse files Browse the repository at this point in the history
* Fetches Whitehall events from Logit
* Fetches Content Store events from Logit
* Turns those events into metrics
* Sends those metrics to Prometheus's PushGateway

In case it's not obvious what's happening with the `OFFSET_MINUTES` and
`INTERVAL_MINUTES` here, I've explained the motivation for `from_time`
and `to_time` in the commits that introduced `WhitehallEvents` and
`ContentStoreEvents`.
  • Loading branch information
mike29736 committed Nov 28, 2023
1 parent 8b1532b commit 304170e
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 5 deletions.
61 changes: 60 additions & 1 deletion lib/govuk_sli_collector/publishing_latency_sli.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,64 @@
require "prometheus/client"
require "prometheus/client/push"

require "govuk_sli_collector/publishing_latency_sli/content_store_events"
require "govuk_sli_collector/publishing_latency_sli/logit_search"
require "govuk_sli_collector/publishing_latency_sli/record_metrics"
require "govuk_sli_collector/publishing_latency_sli/whitehall_events"

module GovukSliCollector
class PublishingLatencySli
def call; end
def initialize
@logit_search = LogitSearch.new(
host: ENV.fetch("LOGIT_OPENSEARCH_HOST"),
basic_auth: ENV.fetch("LOGIT_OPENSEARCH_BASIC_AUTH"),
)
@prometheus_pushgateway_url = ENV.fetch("PROMETHEUS_PUSHGATEWAY_URL")
@to_time = minutes_ago(Integer(ENV.fetch("OFFSET_MINUTES")))
@from_time = @to_time - minutes(Integer(ENV.fetch("INTERVAL_MINUTES")))
end

def call
whitehall_events = WhitehallEvents.new(logit_search:).call(
from_time:,
to_time:,
)

return if whitehall_events.empty?

content_store_events = ContentStoreEvents.new(logit_search:).call(
from_time:,
matching: whitehall_events,
)

return if content_store_events.empty?

prometheus_registry = Prometheus::Client.registry

RecordMetrics.new(prometheus_registry:).call(
whitehall_events:,
content_store_events:,
)

Prometheus::Client::Push.new(
job: "govuk_sli_collector_publishing_latency_sli",
gateway: prometheus_pushgateway_url,
).add(prometheus_registry)
end

private

attr_reader :logit_search,
:prometheus_pushgateway_url,
:from_time,
:to_time

def minutes(number_of)
number_of * 60
end

def minutes_ago(number_of)
Time.now.utc - minutes(number_of)
end
end
end
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
require "prometheus/client"

module GovukSliCollector
class PublishingLatencySli
class RecordMetrics
Expand Down
155 changes: 153 additions & 2 deletions spec/govuk_sli_collector/publishing_latency_sli_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,161 @@

module GovukSliCollector
RSpec.describe PublishingLatencySli do
it "requires no arguments to run" do
let(:logit_opensearch_host) { "https://example.logit.io" }
let(:pushgateway) { instance_spy(Prometheus::Client::Push) }

before do
allow(Prometheus::Client::Push).to receive(:new).and_return(pushgateway)
end

after do
Prometheus::Client.registry.metrics.each do |metric|
Prometheus::Client.registry.unregister(metric.name)
end
end

it "requires some environment variables to run" do
expect {
described_class.new
}.to raise_error(KeyError)
end

it "derives metrics from log data and pushes them to Prometheus" do
stub_whitehall_logs_api
stub_content_store_logs_api

ClimateControl.modify(
INTERVAL_MINUTES: "5",
OFFSET_MINUTES: "5",
LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123",
LOGIT_OPENSEARCH_HOST: logit_opensearch_host,
PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local",
) do
described_class.new.call
end

expect(pushgateway).to have_received(:add)
.with(Prometheus::Client.registry)
end

it "exits early if there were no Whitehall logs data" do
stub_whitehall_logs_api(body: { hits: { hits: [] } })

allow(PublishingLatencySli::ContentStoreEvents).to receive(:new)
.and_call_original

ClimateControl.modify(
INTERVAL_MINUTES: "5",
OFFSET_MINUTES: "5",
LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123",
LOGIT_OPENSEARCH_HOST: logit_opensearch_host,
PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local",
) do
described_class.new.call
end

expect(PublishingLatencySli::ContentStoreEvents).not_to have_received(:new)
end

it "exits early if there were no Content Store logs data" do
stub_whitehall_logs_api
stub_content_store_logs_api(body: { hits: { hits: [] } })

allow(PublishingLatencySli::RecordMetrics).to receive(:new)
.and_call_original

ClimateControl.modify(
INTERVAL_MINUTES: "5",
OFFSET_MINUTES: "5",
LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123",
LOGIT_OPENSEARCH_HOST: logit_opensearch_host,
PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local",
) do
described_class.new.call
}.not_to raise_error
end

expect(PublishingLatencySli::RecordMetrics).not_to have_received(:new)
end

it "gets Whitehall logs from within a given time interval, upto an offset" do
whitehall_events = instance_spy(PublishingLatencySli::WhitehallEvents)
allow(whitehall_events).to receive(:call).and_return([])
allow(PublishingLatencySli::WhitehallEvents).to receive(:new)
.and_return(whitehall_events)

time_now = Time.new(2023, 11, 16, 12, 15, 30)
ten_minutes_ago = Time.new(2023, 11, 16, 12, 5, 30)
thirty_minutes_ago = Time.new(2023, 11, 16, 11, 45, 30)

ClimateControl.modify(
INTERVAL_MINUTES: "20",
OFFSET_MINUTES: "10",
LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123",
LOGIT_OPENSEARCH_HOST: logit_opensearch_host,
PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local",
) do
Timecop.freeze(time_now) do
described_class.new.call
end
end

expect(whitehall_events).to have_received(:call).with(
from_time: thirty_minutes_ago,
to_time: ten_minutes_ago,
)
end

it "gets Content Store logs from the beginning of the time interval" do
stub_whitehall_logs_api

content_store_events = instance_spy(
PublishingLatencySli::ContentStoreEvents,
)
allow(content_store_events).to receive(:call).and_return([])
allow(PublishingLatencySli::ContentStoreEvents).to receive(:new)
.and_return(content_store_events)

time_now = Time.new(2023, 11, 16, 12, 15, 30)
thirty_minutes_ago = Time.new(2023, 11, 16, 11, 45, 30)

ClimateControl.modify(
INTERVAL_MINUTES: "20",
OFFSET_MINUTES: "10",
LOGIT_OPENSEARCH_BASIC_AUTH: "ABC123",
LOGIT_OPENSEARCH_HOST: logit_opensearch_host,
PROMETHEUS_PUSHGATEWAY_URL: "http://prometheus-pushgateway.local",
) do
Timecop.freeze(time_now) do
described_class.new.call
end
end

expect(content_store_events).to have_received(:call).with(
matching: anything,
from_time: thirty_minutes_ago,
)
end

def whitehall_fixture
fixture_path = "spec/fixtures/logit-opensearch-whitehall-events.json"
JSON.parse(File.read(fixture_path))
end

def stub_whitehall_logs_api(body: whitehall_fixture)
stub_request(:get, "#{logit_opensearch_host}/_search")
.with(body: /whitehall-admin/)
.to_return_json(status: 200, body:)
end

def content_store_fixture
fixture_path = "spec/fixtures/logit-opensearch-content-store-events.json"
JSON.parse(File.read(fixture_path))
end

def stub_content_store_logs_api(body: content_store_fixture)
stub_request(:get, "#{logit_opensearch_host}/_search")
.with(body: /content-store/)
.to_return_json(status: 200, body:)
end
end
end

0 comments on commit 304170e

Please sign in to comment.