Skip to content

Commit

Permalink
Add job to aggregate package download count
Browse files Browse the repository at this point in the history
  • Loading branch information
andrii-suse committed Jan 8, 2025
1 parent 9ffaee1 commit 77f4894
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 0 deletions.
88 changes: 88 additions & 0 deletions lib/MirrorCache/Task/StatAggPkg.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (C) 2024 SUSE LLC
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, see <http://www.gnu.org/licenses/>.

package MirrorCache::Task::StatAggPkg;
use Mojo::Base 'Mojolicious::Plugin';
use MirrorCache::Utils 'datetime_now';

sub register {
my ($self, $app) = @_;
$app->minion->add_task(stat_agg_pkg => sub { _run($app, @_) });
}

sub _run {
my ($app, $job) = @_;

my $minion = $app->minion;

# prevent multiple scheduling tasks to run in parallel
return $job->finish('Previous stat agg pkg job is still active')
unless my $guard = $minion->guard('stat_agg_pkg', 86400);

if ($minion->lock('stat_agg_pkg_hour', 5*60)) {
_agg($app, $job, 'hour');
}

if ($minion->lock('stat_agg_pkg_day', 15*60)) {
_agg($app, $job, 'day');
}

}

# my $pkg_re = '^(.*\\/)+(.*)-[^-]+-[^-]+\\.(x86_64|noarch|ppc64le|(a|loong)arch64.*|s390x|i[3-6]86|armv.*|src|riscv64|ppc.*|nosrc|ia64)(\\.d?rpm)$';
my $pkg_re = '^(.*\/)+(.*)-[^-]+-[^-]+\.(x86_64|noarch|ppc64le|(a|loong)arch64.*|s390x|i[3-6]86|armv.*|src|riscv64|ppc.*|nosrc|ia64)(\.d?rpm)$';

sub _agg {
my ($app, $job, $period) = @_;


my $dbh = $app->schema->storage->dbh;
my $sql = "
insert into agg_download_pkg select '$period'::stat_period_t, dt_to, metapkg.id, coalesce(stat.folder_id, 0), stat.country, count(*)
from
( select date_trunc('$period', CURRENT_TIMESTAMP(3)) - interval '1 $period' as dt_from, date_trunc('$period', CURRENT_TIMESTAMP(3)) as dt_to ) x
join stat on dt between x.dt_from and x.dt_to and path like '%rpm'
join metapkg on name = regexp_replace(path, '$pkg_re', '\\2')
left join agg_download_pkg on period = '$period'::stat_period_t and agg_download_pkg.dt = x.dt_to and agg_download_pkg.country = stat.country and agg_download_pkg.folder_id != coalesce(stat.folder_id, 0)
where
agg_download_pkg.period is NULL
group by dt_to, metapkg.id, stat.folder_id, stat.country
";

if ($dbh->{Driver}->{Name} ne 'Pg') {
my $format = '%Y-%m-%d-%H:00';
$format = '%Y-%m-%d-00:00' if $period eq 'day';
$format = '%Y-%m-%d-%H:%i' if $period eq 'minute';

$sql = "
insert into agg_download_pkg select '$period', dt_to, metapkg.id, coalesce(stat.folder_id, 0), stat.country, count(*)
from
( select date_sub(CONVERT(DATE_FORMAT(now(),'$format'),DATETIME), interval 1 $period) as dt_from, CONVERT(DATE_FORMAT(now(),'$format'),DATETIME) as dt_to ) x
join stat on dt between x.dt_from and x.dt_to and path like '%rpm'
join metapkg on name = regexp_replace(path, '$pkg_re', '\\\\2')
left join agg_download_pkg on period = '$period' and agg_download_pkg.dt = x.dt_to and agg_download_pkg.country = stat.country and agg_download_pkg.folder_id != coalesce(stat.folder_id, 0)
where
agg_download_pkg.period is NULL
group by dt_to, metapkg.id, stat.folder_id, stat.country
";
};

eval {
$dbh->prepare($sql)->execute;
1;
} or $job->note("last_warning_$period" => $@, "last_warning_$period" . "_at" => datetime_now());
}

1;
1 change: 1 addition & 0 deletions lib/MirrorCache/Task/StatAggSchedule.pm
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ sub _run {
return $job->finish('Previous stat agg sync schedule job is still active')
unless my $guard = $minion->guard('stat_agg_schedule', 86400);

$minion->enqueue('stat_agg_pkg');

if ($minion->lock('stat_agg_schedule_minute', 10)) {
_agg($app, $job, 'minute');
Expand Down
1 change: 1 addition & 0 deletions lib/MirrorCache/WebAPI/Plugin/Backstage.pm
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ sub register_tasks {
qw(MirrorCache::Task::ReportProjectSize),
qw(MirrorCache::Task::ReportProjectSizeSchedule),
qw(MirrorCache::Task::StatAggSchedule),
qw(MirrorCache::Task::StatAggPkg),
);
}

Expand Down
10 changes: 10 additions & 0 deletions lib/MirrorCache/resources/migrations/Pg.sql
Original file line number Diff line number Diff line change
Expand Up @@ -446,3 +446,13 @@ update popular_os set mask = '.*[lL]eap(/|_)(([1-9][0-9])(.|_)([0-9])?(-test|-Cu
insert into popular_os(id,name,mask) select 10, 'slowroll', '.*/[Ss]lowroll/.*' on conflict do nothing;
-- 41 up
alter table stat_agg add primary key (period, dt, mirror_id);
-- 42 up
create table if not exists agg_download_pkg (
period stat_period_t NOT NULL,
dt timestamp NOT NULL,
metapkg_id bigint NOT NULL,
folder_id bigint NOT NULL,
country varchar(2),
cnt bigint,
primary key(period, dt, metapkg_id, folder_id, country)
);
10 changes: 10 additions & 0 deletions lib/MirrorCache/resources/migrations/mysql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -459,3 +459,13 @@ update popular_os set mask = '.*[lL]eap(/|_)(([1-9][0-9])(.|_)([0-9])?(-test|-Cu
insert into popular_os(id,name,mask) select 10, 'slowroll', '.*/[Ss]lowroll/.*' on duplicate key update id=id;
-- 41 up
alter table stat_agg add primary key if not exists (period, dt, mirror_id);
-- 42 up
create table if not exists agg_download_pkg (
period enum('minute', 'hour', 'day', 'month', 'year', 'total', 'uptime') NOT NULL,
dt timestamp NOT NULL,
metapkg_id bigint NOT NULL,
folder_id bigint NOT NULL,
country varchar(2),
cnt bigint,
primary key(period, dt, metapkg_id, folder_id, country)
);
2 changes: 2 additions & 0 deletions t/environ/20-report-download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -123,5 +123,7 @@ $mc/sql "insert into stat_agg select dt - interval '1 hour', period, mirror_id,
$mc/curl /rest/efficiency
$mc/curl /rest/efficiency?period=day

# $mc/sql 'select * from agg_download_pkg'
$mc/sql_test 4 == "select count(*) from agg_download_pkg join metapkg on metapkg_id = id where period = 'day' and name = 'cargo1.64' group by period, dt"

echo success

0 comments on commit 77f4894

Please sign in to comment.