diff --git a/Cargo.lock b/Cargo.lock index 92a6ec7c..425005ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1057,6 +1057,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1201,7 +1207,7 @@ checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "git" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "clap", @@ -1209,10 +1215,12 @@ dependencies = [ "hipcheck-sdk", "jiff", "log", + "lru", "once_cell", "schemars", "semver", "serde", + "serde_json", "tokio", "which", ] @@ -1992,6 +2000,11 @@ name = "hashbrown" version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "heck" @@ -2795,6 +2808,15 @@ dependencies = [ "logos-codegen", ] +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.0", +] + [[package]] name = "lzma-rs" version = "0.3.0" diff --git a/plugins/activity/local-plugin.kdl b/plugins/activity/local-plugin.kdl index a17de11b..4863b74f 100644 --- a/plugins/activity/local-plugin.kdl +++ b/plugins/activity/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/activity/plugin.kdl b/plugins/activity/plugin.kdl index de4114ff..43dcf770 100644 --- a/plugins/activity/plugin.kdl +++ b/plugins/activity/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/plugins/affiliation/local-plugin.kdl b/plugins/affiliation/local-plugin.kdl index dec5449a..12045ab3 100644 --- a/plugins/affiliation/local-plugin.kdl +++ b/plugins/affiliation/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/affiliation/plugin.kdl b/plugins/affiliation/plugin.kdl index ae64e0ca..9f77cbe6 100644 --- a/plugins/affiliation/plugin.kdl +++ b/plugins/affiliation/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/plugins/churn/local-plugin.kdl b/plugins/churn/local-plugin.kdl index 8deedd1e..ed3a4344 100644 --- a/plugins/churn/local-plugin.kdl +++ b/plugins/churn/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/churn/plugin.kdl b/plugins/churn/plugin.kdl index ada4d438..ff0f5ec5 100644 --- a/plugins/churn/plugin.kdl +++ b/plugins/churn/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/plugins/entropy/local-plugin.kdl b/plugins/entropy/local-plugin.kdl index 98e57139..8c83eea0 100644 --- a/plugins/entropy/local-plugin.kdl +++ b/plugins/entropy/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/entropy/plugin.kdl b/plugins/entropy/plugin.kdl index 86244286..62b6602f 100644 --- a/plugins/entropy/plugin.kdl +++ b/plugins/entropy/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/plugins/git/Cargo.toml b/plugins/git/Cargo.toml index f3ca8dc7..ccfe540a 100644 --- a/plugins/git/Cargo.toml +++ b/plugins/git/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "git" -version = "0.2.0" +version = "0.3.0" license = "Apache-2.0" edition = "2021" repository = "https://github.com/mitre/hipcheck" @@ -12,12 +12,14 @@ clap = { version = "4.5.23", features = ["derive"] } hipcheck-sdk = { version = "0.2.0", path = "../../sdk/rust", features = [ "macros", ] } +lru = "0.12.5" gix = { version = "0.68.0", default-features = false, features = ["basic", "max-control", "zlib-stock"] } jiff = { version = "0.1.14", features = ["serde"] } log = "0.4.22" once_cell = "1.10.0" schemars = { version = "0.8.21", features = ["url"] } semver = "1.0.24" +serde_json = "1.0.134" serde = { version = "1.0.215", features = ["derive", "rc"] } tokio = { version = "1.42.0", features = ["rt"] } which = { version = "7.0.0", default-features = false } diff --git a/plugins/git/src/git.rs b/plugins/git/src/git.rs index d6e0a747..d63c2d2c 100644 --- a/plugins/git/src/git.rs +++ b/plugins/git/src/git.rs @@ -2,6 +2,8 @@ use crate::data::*; +use crate::Error; +use crate::CACHE; use anyhow::Context; use anyhow::Result; use gix::bstr::ByteSlice; @@ -18,13 +20,12 @@ use gix::traverse::commit::simple::CommitTimeOrder; use gix::ObjectId; use gix::Repository; use jiff::Timestamp; +use lru::LruCache; use std::path::Path; use std::path::PathBuf; -use std::sync::Mutex; /// used to cache all of the `RawCommit` from the last repo/HEAD combination analyzed by this -/// plugin -type GitRawCommitCache = Option<(PathBuf, ObjectId, Vec)>; +pub type GitRawCommitCache = LruCache<(PathBuf, ObjectId), Vec>; /// retrieve a handle to the git repo at this path, as well as determine the commit hash of /// HEAD @@ -108,6 +109,7 @@ where { let commit_walker = get_commit_walker(repo, head_commit)?; let commits = walk_commits(repo, commit_walker, &get_raw_commit, None)?; + // let raw_commits: GitRawCommitCache = Ok((repo_path.as_ref().to_path_buf(), head_commit, commits)) } @@ -122,24 +124,23 @@ pub fn get_all_raw_commits

(repo_path: P) -> Result> where P: AsRef, { - // used to cache all of the RawCommits from the last repository analyzed - static ALL_RAW_COMMITS: Mutex = Mutex::new(None); - let (repo, head_commit) = initialize_repo(repo_path.as_ref())?; - let mut cache = ALL_RAW_COMMITS.lock().unwrap(); + let unlocked_cache = CACHE.get().ok_or(Error::UnspecifiedQueryState)?; + let mut cache = unlocked_cache.lock().unwrap(); // if there is a value in cache, and it is the same repo with the same HEAD commit, then we can use the // cached value - if let Some(cached_value) = cache.as_ref() { - if cached_value.0 == repo_path.as_ref().to_path_buf() && cached_value.1 == head_commit { - return Ok(cached_value.2.clone()); - } + if let Some(cached_value) = cache.get_mut(&(repo_path.as_ref().to_path_buf(), head_commit)) { + return Ok(cached_value.clone()); } // otherwise the cache needs to be updated with the data from this repo_path/HEAD combination let updated_value = get_all_raw_commits_inner(&repo, repo_path.as_ref(), head_commit)?; let raw_commits = updated_value.2.clone(); - *cache = Some(get_all_raw_commits_inner(&repo, repo_path, head_commit)?); + cache.put( + (repo_path.as_ref().to_path_buf(), head_commit), + raw_commits.clone(), + ); Ok(raw_commits) } diff --git a/plugins/git/src/main.rs b/plugins/git/src/main.rs index 24c8cb45..adf59ed9 100644 --- a/plugins/git/src/main.rs +++ b/plugins/git/src/main.rs @@ -12,14 +12,22 @@ use crate::{ }, git::{ get_all_raw_commits, get_commit_diffs, get_commits_from_date, get_contributors, get_diffs, - get_latest_commit, + get_latest_commit, GitRawCommitCache, }, }; use clap::Parser; use hipcheck_sdk::{prelude::*, types::LocalGitRepo}; use jiff::Timestamp; +use lru::LruCache; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::{ + num::NonZero, + result::Result as StdResult, + sync::{Mutex, OnceLock}, +}; + +pub static CACHE: OnceLock> = OnceLock::new(); /// A locally stored git repo, with a list of additional details /// The details will vary based on the query (e.g. a date, a committer e-mail address, a commit hash) @@ -428,7 +436,27 @@ async fn commit_contributors( Ok(commit_contributors) } -#[derive(Clone, Debug)] +#[derive(Deserialize)] +struct RawConfig { + #[serde(rename = "commit-cache-size")] + commit_cache_size: Option, +} + +struct Config { + opt_commit_cache_size: Option, +} + +impl TryFrom for Config { + type Error = hipcheck_sdk::error::ConfigError; + fn try_from(value: RawConfig) -> StdResult { + let opt_commit_cache_size = value.commit_cache_size; + Ok(Config { + opt_commit_cache_size, + }) + } +} + +#[derive(Clone, Debug, Default)] struct GitPlugin; impl Plugin for GitPlugin { @@ -436,8 +464,24 @@ impl Plugin for GitPlugin { const NAME: &'static str = "git"; - fn set_config(&self, _config: Value) -> std::result::Result<(), ConfigError> { - Ok(()) + fn set_config(&self, config: Value) -> std::result::Result<(), ConfigError> { + // Deserialize and validate the config struct + let conf: Config = serde_json::from_value::(config) + .map_err(|e| ConfigError::Unspecified { + message: e.to_string(), + })? + .try_into()?; + + let cache_size = match conf.opt_commit_cache_size { + Some(s) => s, + None => 1, + }; + + CACHE + .set(Mutex::new(LruCache::new(NonZero::new(cache_size).unwrap()))) + .map_err(|_e| ConfigError::Unspecified { + message: "config was already set".to_owned(), + }) } fn default_policy_expr(&self) -> hipcheck_sdk::prelude::Result { diff --git a/plugins/identity/local-plugin.kdl b/plugins/identity/local-plugin.kdl index 6854e9ac..693d34ed 100644 --- a/plugins/identity/local-plugin.kdl +++ b/plugins/identity/local-plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="./plugins/git/local-plugin.kdl" + plugin "mitre/git" version="0.3.0" manifest="./plugins/git/local-plugin.kdl" } diff --git a/plugins/identity/plugin.kdl b/plugins/identity/plugin.kdl index 47499b5f..0c229b3d 100644 --- a/plugins/identity/plugin.kdl +++ b/plugins/identity/plugin.kdl @@ -11,5 +11,5 @@ entrypoint { } dependencies { - plugin "mitre/git" version="0.2.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" + plugin "mitre/git" version="0.3.0" manifest="https://hipcheck.mitre.org/dl/plugin/mitre/git.kdl" } diff --git a/site/content/docs/guide/plugins/mitre-git.md b/site/content/docs/guide/plugins/mitre-git.md index 4e4ff32e..3fac99a1 100644 --- a/site/content/docs/guide/plugins/mitre-git.md +++ b/site/content/docs/guide/plugins/mitre-git.md @@ -8,3 +8,9 @@ extra: Provides access to Git commit history data. Does not define a default query and can't be used as a top-level plugin in a policy file. + +## Configuration + +| Parameter | Type | Explanation | +|:--------------------|:--------|:--------------| +| `commit-cache-size` | `Integer` | Number of repositories to retain in cache. |