From ff24dfae08b09cb81a1e65f03b1babcfed1039f6 Mon Sep 17 00:00:00 2001 From: Aisha M Date: Mon, 30 Dec 2024 15:40:50 -0500 Subject: [PATCH] feat: configurable commit cache size in git plugin Signed-off-by: Aisha M --- Cargo.lock | 22 ++++++++++++ plugins/activity/local-plugin.kdl | 2 +- plugins/activity/plugin.kdl | 2 +- plugins/affiliation/local-plugin.kdl | 2 +- plugins/affiliation/plugin.kdl | 2 +- plugins/churn/local-plugin.kdl | 2 +- plugins/churn/plugin.kdl | 2 +- plugins/entropy/local-plugin.kdl | 2 +- plugins/entropy/plugin.kdl | 2 +- plugins/git/Cargo.toml | 2 ++ plugins/git/src/git.rs | 26 +++++++------- plugins/git/src/main.rs | 36 +++++++++++++++++--- plugins/identity/local-plugin.kdl | 2 +- plugins/identity/plugin.kdl | 2 +- site/content/docs/guide/plugins/mitre-git.md | 6 ++++ 15 files changed, 86 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba1a23da..98b417a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1063,6 +1063,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1215,10 +1221,12 @@ dependencies = [ "hipcheck-sdk", "jiff", "log", + "lru", "once_cell", "schemars", "semver", "serde", + "serde_json", "tokio", "which", ] @@ -2061,6 +2069,11 @@ name = "hashbrown" version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "heck" @@ -2876,6 +2889,15 @@ dependencies = [ "logos-codegen", ] +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.0", +] + [[package]] name = "lzma-rs" version = "0.3.0" diff --git a/plugins/activity/local-plugin.kdl b/plugins/activity/local-plugin.kdl index a17de11b..4863b74f 100644 --- a/plugins/activity/local-plugin.kdl +++ b/plugins/activity/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/activity/plugin.kdl b/plugins/activity/plugin.kdl index de4114ff..43dcf770 100644 --- a/plugins/activity/plugin.kdl +++ b/plugins/activity/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/plugins/affiliation/local-plugin.kdl b/plugins/affiliation/local-plugin.kdl index dec5449a..12045ab3 100644 --- a/plugins/affiliation/local-plugin.kdl +++ b/plugins/affiliation/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/affiliation/plugin.kdl b/plugins/affiliation/plugin.kdl index ae64e0ca..9f77cbe6 100644 --- a/plugins/affiliation/plugin.kdl +++ b/plugins/affiliation/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/plugins/churn/local-plugin.kdl b/plugins/churn/local-plugin.kdl index 8deedd1e..ed3a4344 100644 --- a/plugins/churn/local-plugin.kdl +++ b/plugins/churn/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/churn/plugin.kdl b/plugins/churn/plugin.kdl index ada4d438..ff0f5ec5 100644 --- a/plugins/churn/plugin.kdl +++ b/plugins/churn/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/plugins/entropy/local-plugin.kdl b/plugins/entropy/local-plugin.kdl index 98e57139..8c83eea0 100644 --- a/plugins/entropy/local-plugin.kdl +++ b/plugins/entropy/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/entropy/plugin.kdl b/plugins/entropy/plugin.kdl index 86244286..62b6602f 100644 --- a/plugins/entropy/plugin.kdl +++ b/plugins/entropy/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/plugins/git/Cargo.toml b/plugins/git/Cargo.toml index d02195db..f643a663 100644 --- a/plugins/git/Cargo.toml +++ b/plugins/git/Cargo.toml @@ -12,6 +12,7 @@ clap = { version = "4.5.23", features = ["derive"] } hipcheck-sdk = { version = "0.3.0", path = "../../sdk/rust", features = [ "macros", ] } +lru = "0.12.5" gix = { version = "0.69.1", default-features = false, features = [ "basic", "max-control", @@ -22,6 +23,7 @@ log = "0.4.22" once_cell = "1.10.0" schemars = { version = "0.8.21", features = ["url"] } semver = "1.0.24" +serde_json = "1.0.134" serde = { version = "1.0.215", features = ["derive", "rc"] } tokio = { version = "1.42.0", features = ["rt"] } which = { version = "7.0.1", default-features = false } diff --git a/plugins/git/src/git.rs b/plugins/git/src/git.rs index d6e0a747..074dd47c 100644 --- a/plugins/git/src/git.rs +++ b/plugins/git/src/git.rs @@ -2,6 +2,8 @@ use crate::data::*; +use crate::Error; +use crate::CACHE; use anyhow::Context; use anyhow::Result; use gix::bstr::ByteSlice; @@ -18,13 +20,12 @@ use gix::traverse::commit::simple::CommitTimeOrder; use gix::ObjectId; use gix::Repository; use jiff::Timestamp; +use lru::LruCache; use std::path::Path; use std::path::PathBuf; -use std::sync::Mutex; /// used to cache all of the `RawCommit` from the last repo/HEAD combination analyzed by this -/// plugin -type GitRawCommitCache = Option<(PathBuf, ObjectId, Vec)>; +pub type GitRawCommitCache = LruCache<(PathBuf, ObjectId), Vec>; /// retrieve a handle to the git repo at this path, as well as determine the commit hash of /// HEAD @@ -122,24 +123,25 @@ pub fn get_all_raw_commits

(repo_path: P) -> Result> where P: AsRef, { - // used to cache all of the RawCommits from the last repository analyzed - static ALL_RAW_COMMITS: Mutex = Mutex::new(None); - let (repo, head_commit) = initialize_repo(repo_path.as_ref())?; - let mut cache = ALL_RAW_COMMITS.lock().unwrap(); + let unlocked_cache = CACHE.get().ok_or(Error::UnspecifiedQueryState)?; + let mut cache = unlocked_cache + .lock() + .map_err(|_| Error::UnspecifiedQueryState)?; // if there is a value in cache, and it is the same repo with the same HEAD commit, then we can use the // cached value - if let Some(cached_value) = cache.as_ref() { - if cached_value.0 == repo_path.as_ref().to_path_buf() && cached_value.1 == head_commit { - return Ok(cached_value.2.clone()); - } + if let Some(cached_value) = cache.get_mut(&(repo_path.as_ref().to_path_buf(), head_commit)) { + return Ok(cached_value.clone()); } // otherwise the cache needs to be updated with the data from this repo_path/HEAD combination let updated_value = get_all_raw_commits_inner(&repo, repo_path.as_ref(), head_commit)?; let raw_commits = updated_value.2.clone(); - *cache = Some(get_all_raw_commits_inner(&repo, repo_path, head_commit)?); + cache.put( + (repo_path.as_ref().to_path_buf(), head_commit), + raw_commits.clone(), + ); Ok(raw_commits) } diff --git a/plugins/git/src/main.rs b/plugins/git/src/main.rs index 24c8cb45..25447519 100644 --- a/plugins/git/src/main.rs +++ b/plugins/git/src/main.rs @@ -12,14 +12,21 @@ use crate::{ }, git::{ get_all_raw_commits, get_commit_diffs, get_commits_from_date, get_contributors, get_diffs, - get_latest_commit, + get_latest_commit, GitRawCommitCache, }, }; use clap::Parser; use hipcheck_sdk::{prelude::*, types::LocalGitRepo}; use jiff::Timestamp; +use lru::LruCache; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::{ + num::NonZero, + sync::{Mutex, OnceLock}, +}; + +pub static CACHE: OnceLock> = OnceLock::new(); /// A locally stored git repo, with a list of additional details /// The details will vary based on the query (e.g. a date, a committer e-mail address, a commit hash) @@ -428,7 +435,17 @@ async fn commit_contributors( Ok(commit_contributors) } -#[derive(Clone, Debug)] +#[derive(Deserialize)] +struct Config { + #[serde(default = "default_commit_cache_size")] + commit_cache_size: usize, +} + +fn default_commit_cache_size() -> usize { + 1 +} + +#[derive(Clone, Debug, Default)] struct GitPlugin; impl Plugin for GitPlugin { @@ -436,8 +453,19 @@ impl Plugin for GitPlugin { const NAME: &'static str = "git"; - fn set_config(&self, _config: Value) -> std::result::Result<(), ConfigError> { - Ok(()) + fn set_config(&self, config: Value) -> std::result::Result<(), ConfigError> { + // Deserialize and validate the config struct + let conf: Config = + serde_json::from_value::(config).map_err(|e| ConfigError::Unspecified { + message: e.to_string(), + })?; + let cache_size = conf.commit_cache_size; + + CACHE + .set(Mutex::new(LruCache::new(NonZero::new(cache_size).unwrap()))) + .map_err(|_e| ConfigError::Unspecified { + message: "config was already set".to_owned(), + }) } fn default_policy_expr(&self) -> hipcheck_sdk::prelude::Result { diff --git a/plugins/identity/local-plugin.kdl b/plugins/identity/local-plugin.kdl index 6854e9ac..693d34ed 100644 --- a/plugins/identity/local-plugin.kdl +++ b/plugins/identity/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/identity/plugin.kdl b/plugins/identity/plugin.kdl index 47499b5f..0c229b3d 100644 --- a/plugins/identity/plugin.kdl +++ b/plugins/identity/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/site/content/docs/guide/plugins/mitre-git.md b/site/content/docs/guide/plugins/mitre-git.md index 4e4ff32e..a9001f79 100644 --- a/site/content/docs/guide/plugins/mitre-git.md +++ b/site/content/docs/guide/plugins/mitre-git.md @@ -8,3 +8,9 @@ extra: Provides access to Git commit history data. Does not define a default query and can't be used as a top-level plugin in a policy file. + +## Configuration + +| Parameter | Type | Explanation | +|:--------------------|:--------|:--------------| +| `commit-cache-size` | `Integer` | Optional number of repositories to retain in cache. Defaults to one. |