Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLRU added for git plugin #750

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions plugins/git/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ clap = { version = "4.5.23", features = ["derive"] }
hipcheck-sdk = { version = "0.2.0", path = "../../sdk/rust", features = [
"macros",
] }
clru = "0.6.2"
gix = { version = "0.68.0", default-features = false, features = ["basic", "max-control", "zlib-stock"] }
jiff = { version = "0.1.14", features = ["serde"] }
log = "0.4.22"
Expand Down
24 changes: 14 additions & 10 deletions plugins/git/src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::data::*;

use anyhow::Context;
use anyhow::Result;
use clru::CLruCache;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason that the clru crate is better for this than lru? https://docs.rs/lru/latest/lru/

use gix::bstr::ByteSlice;
use gix::diff::blob::intern::InternedInput;
use gix::diff::blob::sink::Counter;
Expand All @@ -18,13 +19,17 @@ use gix::traverse::commit::simple::CommitTimeOrder;
use gix::ObjectId;
use gix::Repository;
use jiff::Timestamp;
use std::num::NonZero;
use std::path::Path;
use std::path::PathBuf;
use std::sync::LazyLock;
use std::sync::Mutex;

/// used to cache all of the `RawCommit` from the last repo/HEAD combination analyzed by this
/// plugin
type GitRawCommitCache = Option<(PathBuf, ObjectId, Vec<RawCommit>)>;
type GitRawCommitCache = CLruCache<(PathBuf, ObjectId), Vec<RawCommit>>;
static CACHE: LazyLock<Mutex<GitRawCommitCache>> =
LazyLock::new(|| Mutex::new(CLruCache::new(NonZero::new(1).unwrap())));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point of this PR is to make the 1 you have here a configurable parameter


/// retrieve a handle to the git repo at this path, as well as determine the commit hash of
/// HEAD
Expand Down Expand Up @@ -108,6 +113,7 @@ where
{
let commit_walker = get_commit_walker(repo, head_commit)?;
let commits = walk_commits(repo, commit_walker, &get_raw_commit, None)?;
// let raw_commits: GitRawCommitCache =
Ok((repo_path.as_ref().to_path_buf(), head_commit, commits))
}

Expand All @@ -122,24 +128,22 @@ pub fn get_all_raw_commits<P>(repo_path: P) -> Result<Vec<RawCommit>>
where
P: AsRef<Path>,
{
// used to cache all of the RawCommits from the last repository analyzed
static ALL_RAW_COMMITS: Mutex<GitRawCommitCache> = Mutex::new(None);

let (repo, head_commit) = initialize_repo(repo_path.as_ref())?;
let mut cache = ALL_RAW_COMMITS.lock().unwrap();
let mut cache = CACHE.lock().unwrap();

// if there is a value in cache, and it is the same repo with the same HEAD commit, then we can use the
// cached value
if let Some(cached_value) = cache.as_ref() {
if cached_value.0 == repo_path.as_ref().to_path_buf() && cached_value.1 == head_commit {
return Ok(cached_value.2.clone());
}
if let Some(cached_value) = cache.get_mut(&(repo_path.as_ref().to_path_buf(), head_commit)) {
return Ok(cached_value.clone());
}

// otherwise the cache needs to be updated with the data from this repo_path/HEAD combination
let updated_value = get_all_raw_commits_inner(&repo, repo_path.as_ref(), head_commit)?;
let raw_commits = updated_value.2.clone();
*cache = Some(get_all_raw_commits_inner(&repo, repo_path, head_commit)?);
cache.put(
(repo_path.as_ref().to_path_buf(), head_commit),
raw_commits.clone(),
);
Ok(raw_commits)
}

Expand Down
Loading