diff --git a/lib/MirrorCache/Task/StatAggPkg.pm b/lib/MirrorCache/Task/StatAggPkg.pm new file mode 100644 index 00000000..2d3e0354 --- /dev/null +++ b/lib/MirrorCache/Task/StatAggPkg.pm @@ -0,0 +1,88 @@ +# Copyright (C) 2024 SUSE LLC +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, see . + +package MirrorCache::Task::StatAggPkg; +use Mojo::Base 'Mojolicious::Plugin'; +use MirrorCache::Utils 'datetime_now'; + +sub register { + my ($self, $app) = @_; + $app->minion->add_task(stat_agg_pkg => sub { _run($app, @_) }); +} + +sub _run { + my ($app, $job) = @_; + + my $minion = $app->minion; + + # prevent multiple scheduling tasks to run in parallel + return $job->finish('Previous stat agg pkg job is still active') + unless my $guard = $minion->guard('stat_agg_pkg', 86400); + + if ($minion->lock('stat_agg_pkg_hour', 5*60)) { + _agg($app, $job, 'hour'); + } + + if ($minion->lock('stat_agg_pkg_day', 15*60)) { + _agg($app, $job, 'day'); + } + +} + +# my $pkg_re = '^(.*\\/)+(.*)-[^-]+-[^-]+\\.(x86_64|noarch|ppc64le|(a|loong)arch64.*|s390x|i[3-6]86|armv.*|src|riscv64|ppc.*|nosrc|ia64)(\\.d?rpm)$'; +my $pkg_re = '^(.*\/)+(.*)-[^-]+-[^-]+\.(x86_64|noarch|ppc64le|(a|loong)arch64.*|s390x|i[3-6]86|armv.*|src|riscv64|ppc.*|nosrc|ia64)(\.d?rpm)$'; + +sub _agg { + my ($app, $job, $period) = @_; + + + my $dbh = $app->schema->storage->dbh; + my $sql = " +insert into agg_download_pkg select '$period'::stat_period_t, dt_to, metapkg.id, coalesce(stat.folder_id, 0), stat.country, count(*) +from +( select date_trunc('$period', CURRENT_TIMESTAMP(3)) - interval '1 $period' as dt_from, date_trunc('$period', CURRENT_TIMESTAMP(3)) as dt_to ) x +join stat on dt between x.dt_from and x.dt_to and path like '%rpm' +join metapkg on name = regexp_replace(path, '$pkg_re', '\\2') +left join agg_download_pkg on period = '$period'::stat_period_t and agg_download_pkg.dt = x.dt_to and agg_download_pkg.country = stat.country and agg_download_pkg.folder_id != coalesce(stat.folder_id, 0) +where +agg_download_pkg.period is NULL +group by dt_to, metapkg.id, stat.folder_id, stat.country +"; + + if ($dbh->{Driver}->{Name} ne 'Pg') { + my $format = '%Y-%m-%d-%H:00'; + $format = '%Y-%m-%d-00:00' if $period eq 'day'; + $format = '%Y-%m-%d-%H:%i' if $period eq 'minute'; + + $sql = " +insert into agg_download_pkg select '$period', dt_to, metapkg.id, coalesce(stat.folder_id, 0), stat.country, count(*) +from +( select date_sub(CONVERT(DATE_FORMAT(now(),'$format'),DATETIME), interval 1 $period) as dt_from, CONVERT(DATE_FORMAT(now(),'$format'),DATETIME) as dt_to ) x +join stat on dt between x.dt_from and x.dt_to and path like '%rpm' +join metapkg on name = regexp_replace(path, '$pkg_re', '\\\\2') +left join agg_download_pkg on period = '$period' and agg_download_pkg.dt = x.dt_to and agg_download_pkg.country = stat.country and agg_download_pkg.folder_id != coalesce(stat.folder_id, 0) +where +agg_download_pkg.period is NULL +group by dt_to, metapkg.id, stat.folder_id, stat.country +"; + }; + + eval { + $dbh->prepare($sql)->execute; + 1; + } or $job->note("last_warning_$period" => $@, "last_warning_$period" . "_at" => datetime_now()); +} + +1; diff --git a/lib/MirrorCache/Task/StatAggSchedule.pm b/lib/MirrorCache/Task/StatAggSchedule.pm index c4c43fb8..092453c4 100644 --- a/lib/MirrorCache/Task/StatAggSchedule.pm +++ b/lib/MirrorCache/Task/StatAggSchedule.pm @@ -33,6 +33,7 @@ sub _run { return $job->finish('Previous stat agg sync schedule job is still active') unless my $guard = $minion->guard('stat_agg_schedule', 86400); + $minion->enqueue('stat_agg_pkg'); if ($minion->lock('stat_agg_schedule_minute', 10)) { _agg($app, $job, 'minute'); diff --git a/lib/MirrorCache/WebAPI/Plugin/Backstage.pm b/lib/MirrorCache/WebAPI/Plugin/Backstage.pm index 17cca69e..1ba467ca 100644 --- a/lib/MirrorCache/WebAPI/Plugin/Backstage.pm +++ b/lib/MirrorCache/WebAPI/Plugin/Backstage.pm @@ -71,6 +71,7 @@ sub register_tasks { qw(MirrorCache::Task::ReportProjectSize), qw(MirrorCache::Task::ReportProjectSizeSchedule), qw(MirrorCache::Task::StatAggSchedule), + qw(MirrorCache::Task::StatAggPkg), ); } diff --git a/lib/MirrorCache/resources/migrations/Pg.sql b/lib/MirrorCache/resources/migrations/Pg.sql index 2ec2d297..27785e1b 100644 --- a/lib/MirrorCache/resources/migrations/Pg.sql +++ b/lib/MirrorCache/resources/migrations/Pg.sql @@ -446,3 +446,13 @@ update popular_os set mask = '.*[lL]eap(/|_)(([1-9][0-9])(.|_)([0-9])?(-test|-Cu insert into popular_os(id,name,mask) select 10, 'slowroll', '.*/[Ss]lowroll/.*' on conflict do nothing; -- 41 up alter table stat_agg add primary key (period, dt, mirror_id); +-- 42 up +create table if not exists agg_download_pkg ( + period stat_period_t NOT NULL, + dt timestamp NOT NULL, + metapkg_id bigint NOT NULL, + folder_id bigint NOT NULL, + country varchar(2), + cnt bigint, + primary key(period, dt, metapkg_id, folder_id, country) +); diff --git a/lib/MirrorCache/resources/migrations/mysql.sql b/lib/MirrorCache/resources/migrations/mysql.sql index 4db077eb..6ee5cf8b 100644 --- a/lib/MirrorCache/resources/migrations/mysql.sql +++ b/lib/MirrorCache/resources/migrations/mysql.sql @@ -459,3 +459,13 @@ update popular_os set mask = '.*[lL]eap(/|_)(([1-9][0-9])(.|_)([0-9])?(-test|-Cu insert into popular_os(id,name,mask) select 10, 'slowroll', '.*/[Ss]lowroll/.*' on duplicate key update id=id; -- 41 up alter table stat_agg add primary key if not exists (period, dt, mirror_id); +-- 42 up +create table if not exists agg_download_pkg ( + period enum('minute', 'hour', 'day', 'month', 'year', 'total', 'uptime') NOT NULL, + dt timestamp NOT NULL, + metapkg_id bigint NOT NULL, + folder_id bigint NOT NULL, + country varchar(2), + cnt bigint, + primary key(period, dt, metapkg_id, folder_id, country) +); diff --git a/t/environ/20-report-download.sh b/t/environ/20-report-download.sh index 6ff61757..73221d59 100755 --- a/t/environ/20-report-download.sh +++ b/t/environ/20-report-download.sh @@ -123,5 +123,7 @@ $mc/sql "insert into stat_agg select dt - interval '1 hour', period, mirror_id, $mc/curl /rest/efficiency $mc/curl /rest/efficiency?period=day +# $mc/sql 'select * from agg_download_pkg' +$mc/sql_test 4 == "select count(*) from agg_download_pkg join metapkg on metapkg_id = id where period = 'day' and name = 'cargo1.64' group by period, dt" echo success