Skip to content

Commit

Permalink
[refactor/bugfix] Changing how the CFG edge lists are created to acco…
Browse files Browse the repository at this point in the history
…unt for heavily optimised CFG's and make it much simpler overall
  • Loading branch information
br0kej committed Sep 11, 2024
1 parent 82acf8e commit 8f2cb57
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 99 deletions.
76 changes: 39 additions & 37 deletions src/agfj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,17 +136,13 @@ impl AGFJFunc {
}
pub fn create_bb_edge_list(&mut self, min_blocks: &u16) {
if self.blocks.len() > (*min_blocks).into() && self.blocks[0].offset != 1 {
let mut addr_idxs = Vec::<i64>::new();

let bb_start_addrs: Vec<i64> = self.blocks.iter().map(|x| x.offset).collect::<Vec<_>>();
let mut edge_list = Vec::<(u32, u32, u32)>::new();

let min_offset: u64 = self.offset;
let max_offset: u64 = self.offset + self.size.unwrap_or(0);

for bb in &self.blocks {
bb.get_block_edges(&mut addr_idxs, &mut edge_list, max_offset, min_offset)
bb.get_block_edges(&bb_start_addrs, &mut edge_list)
}
self.addr_idx = Some(addr_idxs);
self.addr_idx = Some(bb_start_addrs);
self.edge_list = Some(edge_list);
}
}
Expand Down Expand Up @@ -279,23 +275,28 @@ impl AGFJFunc {
feature_type: FeatureType,
inference_job: &Option<Arc<InferenceJob>>,
) {
/*
This function needs some serious sorting out.
- Need to get GPU toggle-able
- Need to use new CFG edge builder
- General refactor
*/
info!("Processing {:?}", self.name);
let full_output_path =
get_save_file_path(path, output_path, Some(".json".to_string()), None, None);
check_or_create_dir(&full_output_path);

// offset != 1 has been added to skip functions with invalid instructions
if self.blocks.len() >= (*min_blocks).into() && self.blocks[0].offset != 1 {
let mut addr_idxs = Vec::<i64>::new();

let bb_start_addrs: Vec<i64> = self.blocks.iter().map(|x| x.offset).collect::<Vec<_>>();
let mut edge_list = Vec::<(u32, u32, u32)>::new();

let mut feature_vecs = Vec::<_>::new();
let mut feature_vec_of_vecs = Vec::<_>::new();
let min_offset = self.offset;
let max_offset = self.offset + self.size.unwrap_or(0);

for bb in &self.blocks {
bb.get_block_edges(&mut addr_idxs, &mut edge_list, max_offset, min_offset);
bb.get_block_edges(&bb_start_addrs, &mut edge_list);
if inference_job.is_some() {
let inference = inference_job.as_ref().unwrap().clone();
match feature_type {
Expand Down Expand Up @@ -393,7 +394,6 @@ impl AGFJFunc {
if !Path::new(&fname_string).is_file() {
// offset != 1 has been added to skip functions with invalid instructions
if self.blocks.len() >= (*min_blocks).into() && self.blocks[0].offset != 1 {
let mut addr_idxs = Vec::<i64>::new();
let mut edge_list = Vec::<(u32, u32, u32)>::new();

let mut feature_vecs: StringOrF64 = match feature_type {
Expand All @@ -411,36 +411,30 @@ impl AGFJFunc {
}
};

let min_offset: u64 = self.offset;
let max_offset: u64 = self.offset + self.size.unwrap_or(0);
let bb_start_addrs: Vec<i64> =
self.blocks.iter().map(|x| x.offset).collect::<Vec<_>>();

match feature_type {
FeatureType::Tiknib
| FeatureType::Gemini
| FeatureType::DiscovRE
| FeatureType::DGIS => {
let feature_vecs = feature_vecs.as_f64_mut().unwrap();
for bb in &self.blocks {
bb.get_block_edges(
&mut addr_idxs,
&mut edge_list,
max_offset,
min_offset,
);
bb.get_block_edges(&bb_start_addrs, &mut edge_list);
bb.generate_bb_feature_vec(feature_vecs, feature_type, architecture);
}
debug!("Number of Feature Vecs: {}", feature_vecs.len());
assert_eq!(self.blocks.len(), feature_vecs.len())
}
FeatureType::Esil | FeatureType::Disasm | FeatureType::Pseudo => {
let feature_vecs = feature_vecs.as_string_mut().unwrap();
for bb in &self.blocks {
bb.get_block_edges(
&mut addr_idxs,
&mut edge_list,
max_offset,
min_offset,
);
bb.get_block_edges(&bb_start_addrs, &mut edge_list);
bb.generate_bb_feature_strings(feature_vecs, feature_type, true);
}
debug!("Number of Feature Vecs: {}", feature_vecs.len())
debug!("Number of Feature Vecs: {}", feature_vecs.len());
assert_eq!(self.blocks.len(), feature_vecs.len())
}
FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => {
info!("Invalid Feature Type. Skipping..");
Expand All @@ -454,10 +448,16 @@ impl AGFJFunc {
edge_list.is_empty(),
edge_list.len()
);
if !edge_list.is_empty() {
let mut graph = Graph::<std::string::String, u32>::from_edges(&edge_list);

Self::str_to_hex_node_idxs(&mut graph, &mut addr_idxs);
if !edge_list.is_empty() {
let mut graph = Graph::<String, u32>::from_edges(&edge_list);
Self::str_to_hex_node_idxs(&mut graph, &bb_start_addrs);
if graph.node_count() != self.blocks.len() {
debug!("Graph for {} does not have the same number of nodes as basic blocks - N: {} B: {}. This suggests \
there is something wrong with the CFG edge recovery. If this is a problem, please raise a GitHub issue!",
self.name, graph.node_count(), self.blocks.len());
return;
}

// Unpack the NodeTypes to the inner Types
if feature_type == FeatureType::Gemini {
Expand Down Expand Up @@ -577,22 +577,24 @@ impl AGFJFunc {
info!("Function {} has no edges. Skipping...", self.name)
}
} else {
info!(
debug!(
"Function {} has less than the minimum number of blocks. Skipping..",
self.name
);
}
} else {
info!(
"Function {} has already been processed. Skipping...",
self.name
)
trace!("Function has fewer basic blocks than the minimum. Skipping...");
}
} else {
debug!(
"Function {} has already been processed. Skipping...",
self.name
)
}
}

// Convert string memory address to hex / string
fn str_to_hex_node_idxs(graph: &mut Graph<String, u32>, addr_idxs: &mut [i64]) {
fn str_to_hex_node_idxs(graph: &mut Graph<String, u32>, addr_idxs: &[i64]) {
for idx in graph.node_indices() {
let i_idx = idx.index();
let hex = addr_idxs[i_idx];
Expand Down
75 changes: 13 additions & 62 deletions src/bb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,70 +446,21 @@ impl ACFJBlock {
}
num_offspring
}

// Get the edges associated with a given basic block.
// This function only considers valid edges as being
// fail, jumps or switchops that reside within the function itself.
// If there are edges that jump to another function outside of the program
// these edges are ignored.
//
// This function updates the provide mutable edge list with a three-tuple which
// represents (src, dst, weight). The weight in this case is the type of edge where
// 1 denotes jump, 2 denotes fail, 3 denotes switchop
pub fn get_block_edges(
&self,
addr_idxs: &mut Vec<i64>,
edge_list: &mut Vec<(u32, u32, u32)>,
max_offset: u64,
min_offset: u64,
) {
let mut addr: i64 = self.offset;
let mut jump: i64 = self.jump;
let mut fail: i64 = self.fail;

if addr < min_offset.try_into().unwrap() || addr >= max_offset.try_into().unwrap() {
addr = -1;
}

if jump < min_offset.try_into().unwrap() || jump >= max_offset.try_into().unwrap() {
jump = -1;
}

if fail < min_offset.try_into().unwrap() || fail >= max_offset.try_into().unwrap() {
fail = -1;
}

if addr != -1 && !addr_idxs.contains(&addr) {
addr_idxs.push(addr);
}
if jump != -1 && !addr_idxs.contains(&jump) {
addr_idxs.push(jump)
}

if fail != -1 && !addr_idxs.contains(&fail) {
addr_idxs.push(fail)
}

let addr_idx = addr_idxs.iter().position(|&p| p == addr);

if let Some(addr_idx) = addr_idx {
if jump != -1 {
let jump_idx = addr_idxs.iter().position(|&p| p == jump).unwrap();
edge_list.push((addr_idx as u32, jump_idx as u32, 1));
}

if fail != -1 {
let fail_idx = addr_idxs.iter().position(|&p| p == fail).unwrap();
edge_list.push((addr_idx as u32, fail_idx as u32, 2));
pub fn get_block_edges(&self, bb_start_addrs: &[i64], edge_list: &mut Vec<(u32, u32, u32)>) {
let offset_idx = bb_start_addrs.iter().position(|&p| p == self.offset);

if let Some(offset_idx) = offset_idx {
if self.jump != -1 {
let jump_idx = bb_start_addrs.iter().position(|&p| p == self.jump);
if let Some(jump_idx) = jump_idx {
edge_list.push((offset_idx as u32, jump_idx as u32, 1));
}
}

if self.switchop.is_some() {
for item in &self.switchop.as_ref().unwrap().cases {
if !addr_idxs.contains(&item.jump) {
addr_idxs.push(item.jump)
}
let item_addr_idx = addr_idxs.iter().position(|&p| p == item.jump).unwrap();
edge_list.push((addr_idx as u32, item_addr_idx as u32, 3));
if self.fail != -1 {
let fail_idx = bb_start_addrs.iter().position(|&p| p == self.fail);
if let Some(fail_idx) = fail_idx {
edge_list.push((offset_idx as u32, fail_idx as u32, 1));
}
}
}
Expand Down

0 comments on commit 8f2cb57

Please sign in to comment.