Skip to content

Commit

Permalink
Merge pull request #3 from alphagov/publishing-latency-sli
Browse files Browse the repository at this point in the history
Publishing latency SLI
  • Loading branch information
mike29736 authored Nov 29, 2023
2 parents 303b02a + 304170e commit 93e57f6
Show file tree
Hide file tree
Showing 23 changed files with 1,720 additions and 0 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,18 @@ jobs:
lint-ruby:
name: Lint Ruby
uses: alphagov/govuk-infrastructure/.github/workflows/rubocop.yml@main

test-ruby:
name: Test Ruby
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
bundler-cache: true

- name: Run RSpec
run: bundle exec rspec
1 change: 1 addition & 0 deletions .rspec
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--require spec_helper
1 change: 1 addition & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
inherit_gem:
rubocop-govuk:
- config/default.yml
- config/rspec.yml

inherit_mode:
merge:
Expand Down
10 changes: 10 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

source "https://rubygems.org"

gem "prometheus-client"
gem "rest-client"

group :test do
gem "climate_control"
gem "rspec"
gem "timecop"
gem "webmock"
end

group :development do
gem "rubocop-govuk"
end
50 changes: 50 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,64 @@ GEM
minitest (>= 5.1)
mutex_m
tzinfo (~> 2.0)
addressable (2.8.5)
public_suffix (>= 2.0.2, < 6.0)
ast (2.4.2)
base64 (0.1.1)
bigdecimal (3.1.4)
climate_control (1.2.0)
concurrent-ruby (1.2.2)
connection_pool (2.4.1)
crack (0.4.5)
rexml
diff-lcs (1.5.0)
domain_name (0.5.20190701)
unf (>= 0.0.5, < 1.0.0)
drb (2.1.1)
ruby2_keywords
hashdiff (1.0.1)
http-accept (1.7.0)
http-cookie (1.0.5)
domain_name (~> 0.5)
i18n (1.14.1)
concurrent-ruby (~> 1.0)
json (2.6.3)
language_server-protocol (3.17.0.3)
mime-types (3.5.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2023.1003)
minitest (5.20.0)
mutex_m (0.1.2)
netrc (0.11.0)
parallel (1.23.0)
parser (3.2.2.4)
ast (~> 2.4.1)
racc
prometheus-client (4.2.2)
public_suffix (5.0.3)
racc (1.7.1)
rack (3.0.8)
rainbow (3.1.1)
regexp_parser (2.8.2)
rest-client (2.1.0)
http-accept (>= 1.7.0, < 2.0)
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
rexml (3.2.6)
rspec (3.12.0)
rspec-core (~> 3.12.0)
rspec-expectations (~> 3.12.0)
rspec-mocks (~> 3.12.0)
rspec-core (3.12.2)
rspec-support (~> 3.12.0)
rspec-expectations (3.12.3)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.12.0)
rspec-mocks (3.12.6)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.12.0)
rspec-support (3.12.1)
rubocop (1.55.0)
json (~> 2.3)
language_server-protocol (>= 3.17.0)
Expand Down Expand Up @@ -68,15 +104,29 @@ GEM
rubocop-factory_bot (~> 2.22)
ruby-progressbar (1.13.0)
ruby2_keywords (0.0.5)
timecop (0.9.8)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.8.2)
unicode-display_width (2.5.0)
webmock (3.19.1)
addressable (>= 2.8.0)
crack (>= 0.3.2)
hashdiff (>= 0.4.0, < 2.0.0)

PLATFORMS
x86_64-linux

DEPENDENCIES
climate_control
prometheus-client
rest-client
rspec
rubocop-govuk
timecop
webmock

BUNDLED WITH
2.4.10
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,16 @@ Collects data about GOV.UK services and produces SLI metrics for use in SLO dash

This is a Ruby script.

In production, it's run from a scheduled task via its `./collect` executable.

### Running the linter

`bundle exec rubocop`

### Running the test suite

`bundle exec rspec`

## Licence

[MIT License](LICENCE.txt)
10 changes: 10 additions & 0 deletions collect
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env ruby

require "bundler"
Bundler.setup(:default)

$LOAD_PATH << "./lib"

require "govuk_sli_collector"

GovukSliCollector.call
7 changes: 7 additions & 0 deletions lib/govuk_sli_collector.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
require "govuk_sli_collector/publishing_latency_sli"

module GovukSliCollector
def self.call
GovukSliCollector::PublishingLatencySli.new.call
end
end
64 changes: 64 additions & 0 deletions lib/govuk_sli_collector/publishing_latency_sli.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
require "prometheus/client"
require "prometheus/client/push"

require "govuk_sli_collector/publishing_latency_sli/content_store_events"
require "govuk_sli_collector/publishing_latency_sli/logit_search"
require "govuk_sli_collector/publishing_latency_sli/record_metrics"
require "govuk_sli_collector/publishing_latency_sli/whitehall_events"

module GovukSliCollector
class PublishingLatencySli
def initialize
@logit_search = LogitSearch.new(
host: ENV.fetch("LOGIT_OPENSEARCH_HOST"),
basic_auth: ENV.fetch("LOGIT_OPENSEARCH_BASIC_AUTH"),
)
@prometheus_pushgateway_url = ENV.fetch("PROMETHEUS_PUSHGATEWAY_URL")
@to_time = minutes_ago(Integer(ENV.fetch("OFFSET_MINUTES")))
@from_time = @to_time - minutes(Integer(ENV.fetch("INTERVAL_MINUTES")))
end

def call
whitehall_events = WhitehallEvents.new(logit_search:).call(
from_time:,
to_time:,
)

return if whitehall_events.empty?

content_store_events = ContentStoreEvents.new(logit_search:).call(
from_time:,
matching: whitehall_events,
)

return if content_store_events.empty?

prometheus_registry = Prometheus::Client.registry

RecordMetrics.new(prometheus_registry:).call(
whitehall_events:,
content_store_events:,
)

Prometheus::Client::Push.new(
job: "govuk_sli_collector_publishing_latency_sli",
gateway: prometheus_pushgateway_url,
).add(prometheus_registry)
end

private

attr_reader :logit_search,
:prometheus_pushgateway_url,
:from_time,
:to_time

def minutes(number_of)
number_of * 60
end

def minutes_ago(number_of)
Time.now.utc - minutes(number_of)
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
require "govuk_sli_collector/publishing_latency_sli/log_event"

module GovukSliCollector
class PublishingLatencySli
class ContentStoreEvents
def initialize(logit_search:)
@logit_search = logit_search
end

def call(matching:, from_time:)
govuk_request_ids = matching.map(&:govuk_request_id)

log_event_hashes = logit_search.call(
app_name: "content-store-mongo-main",
govuk_request_ids:,
route: "content_items#update",
from_time:,
)

log_event_hashes.map do |event_data|
LogEvent.new(
govuk_request_id: event_data["govuk_request_id"].first,
time: Time.new(event_data["@timestamp"].first).floor,
)
end
end

private

attr_reader :logit_search
end
end
end
5 changes: 5 additions & 0 deletions lib/govuk_sli_collector/publishing_latency_sli/log_event.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module GovukSliCollector
class PublishingLatencySli
LogEvent = Struct.new(:govuk_request_id, :time)
end
end
67 changes: 67 additions & 0 deletions lib/govuk_sli_collector/publishing_latency_sli/logit_search.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
require "json"
require "rest-client"

module GovukSliCollector
class PublishingLatencySli
class LogitSearch
class Error < StandardError; end

def initialize(host:, basic_auth:)
@url = URI.join(host, "_search").to_s
@basic_auth = basic_auth
end

def call(app_name:, route:, from_time:, to_time: nil, govuk_request_ids: [])
response = RestClient::Request.execute(
method: :get,
url:,
headers: {
"Content-Type": "application/json",
"Authorization": "Basic #{basic_auth}",
},
payload: {
query: {
bool: {
filter: [
{ term: { "kubernetes.deployment.name": app_name } },
{ term: { "kubernetes.container.name": "app" } },
(
if route.is_a?(Array)
{ terms: { route: } }
else
{ term: { route: } }
end
),
{
range: {
"@timestamp": {
gte: from_time.iso8601,
lt: (to_time.iso8601 unless to_time.nil?),
}.compact,
},
},
(
unless govuk_request_ids.empty?
{ terms: { govuk_request_id: govuk_request_ids } }
end
),
].compact,
},
},
fields: ["duration", "govuk_request_id", "@timestamp"],
_source: false,
}.to_json,
)

payload = JSON.parse(response.body)
payload["hits"]["hits"].map { |event_data| event_data["fields"] }
rescue StandardError => e
raise Error, e.inspect
end

private

attr_reader :url, :basic_auth
end
end
end
43 changes: 43 additions & 0 deletions lib/govuk_sli_collector/publishing_latency_sli/record_metrics.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
module GovukSliCollector
class PublishingLatencySli
class RecordMetrics
def initialize(prometheus_registry:)
@prometheus_registry = prometheus_registry
end

def call(whitehall_events:, content_store_events:)
return [] if whitehall_events.empty? || content_store_events.empty?

first_content = prometheus_registry.histogram(
:publishing_latency_first_content_s,
docstring: "Publishing latency for a single content item, in seconds",
)
all_content = prometheus_registry.histogram(
:publishing_latency_all_content_s,
docstring: "Publishing latency for all affected content items, in seconds",
)

content_store_events_by_id = content_store_events.group_by(&:govuk_request_id)

whitehall_events.each do |whitehall_event|
matching_events = content_store_events_by_id[whitehall_event.govuk_request_id]

next if matching_events.nil? || matching_events.empty?

first_content_store_time, last_content_store_time = matching_events.map(&:time).minmax

(first_content_store_time - whitehall_event.time).tap do |latency|
first_content.observe(latency) unless latency.negative?
end
(last_content_store_time - whitehall_event.time).tap do |latency|
all_content.observe(latency) unless latency.negative?
end
end
end

private

attr_reader :prometheus_registry
end
end
end
Loading

0 comments on commit 93e57f6

Please sign in to comment.