Skip to content

Commit

Permalink
Merge pull request #17 from br0kej/dev
Browse files Browse the repository at this point in the history
Adding support for Pseudo Code CFG's
  • Loading branch information
br0kej authored Aug 20, 2024
2 parents bc146e0 + f783404 commit 0de0ab2
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 20 deletions.
80 changes: 67 additions & 13 deletions src/agfj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use crate::bb::{ACFJBlock, FeatureType, TikNibFeaturesBB};
#[cfg(feature = "inference")]
use crate::inference::InferenceJob;
use crate::networkx::{
DGISNode, DisasmNode, DiscovreNode, EsilNode, GeminiNode, NetworkxDiGraph, NodeType, TiknibNode,
DGISNode, DisasmNode, DiscovreNode, EsilNode, GeminiNode, NetworkxDiGraph, NodeType,
PseudoNode, TiknibNode,
};
use crate::utils::{average, check_or_create_dir, get_save_file_path};
use enum_as_inner::EnumAsInner;
Expand Down Expand Up @@ -110,6 +111,29 @@ impl AGFJFunc {
}
}

pub fn get_psuedo_function_string(
&self,
min_blocks: &u16,
reg_norm: bool,
) -> Option<(String, String)> {
let mut psuedo_function = Vec::<String>::new();
if self.blocks.len() >= (*min_blocks).into() && self.blocks[0].offset != 1 {
for bb in &self.blocks {
let psuedo: Vec<String> = bb.get_psuedo_bb(reg_norm);
for ins in psuedo.iter() {
if !ins.is_empty() {
let split: Vec<String> = ins.split(',').map(|s| s.to_string()).collect();
let split_joined = split.join(" ");
psuedo_function.push(split_joined);
}
}
}
let joined = psuedo_function.join(" ");
Some((self.name.clone(), joined))
} else {
None
}
}
pub fn create_bb_edge_list(&mut self, min_blocks: &u16) {
if self.blocks.len() > (*min_blocks).into() && self.blocks[0].offset != 1 {
let mut addr_idxs = Vec::<i64>::new();
Expand Down Expand Up @@ -340,7 +364,13 @@ impl AGFJFunc {
feature_type: FeatureType,
architecture: &String,
) {
let full_output_path = get_save_file_path(path, output_path, None, None, None);
let full_output_path = get_save_file_path(
path,
output_path,
None,
Some(feature_type.to_string()),
None,
);
check_or_create_dir(&full_output_path);
let file_name = path.file_name().unwrap();
let binding = file_name.to_string_lossy().to_string();
Expand Down Expand Up @@ -371,12 +401,14 @@ impl AGFJFunc {
| FeatureType::Gemini
| FeatureType::DiscovRE
| FeatureType::DGIS => StringOrF64::F64(Vec::new()),
FeatureType::Esil | FeatureType::Disasm => StringOrF64::String(Vec::new()),
FeatureType::Esil
| FeatureType::Disasm
| FeatureType::Pseudo
| FeatureType::Pcode => StringOrF64::String(Vec::new()),
FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => {
info!("Invalid Feature Type. Skipping..");
return;
}
FeatureType::Pcode => StringOrF64::String(Vec::new()),
};

let min_offset: u64 = self.offset;
Expand All @@ -397,7 +429,7 @@ impl AGFJFunc {
bb.generate_bb_feature_vec(feature_vecs, feature_type, architecture);
}
}
FeatureType::Esil | FeatureType::Disasm => {
FeatureType::Esil | FeatureType::Disasm | FeatureType::Pseudo => {
let feature_vecs = feature_vecs.as_string_mut().unwrap();
for bb in &self.blocks {
bb.get_block_edges(
Expand All @@ -408,6 +440,7 @@ impl AGFJFunc {
);
bb.generate_bb_feature_strings(feature_vecs, feature_type, true);
}
debug!("Number of Feature Vecs: {}", feature_vecs.len())
}
FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => {
info!("Invalid Feature Type. Skipping..");
Expand All @@ -416,6 +449,11 @@ impl AGFJFunc {
_ => {}
};

debug!(
"Edge List Empty: {} Edge List Dims: {}",
edge_list.is_empty(),
edge_list.len()
);
if !edge_list.is_empty() {
let mut graph = Graph::<std::string::String, u32>::from_edges(&edge_list);

Expand Down Expand Up @@ -519,21 +557,37 @@ impl AGFJFunc {
&networkx_graph_inners,
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::Pseudo {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_string().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<PseudoNode> =
NetworkxDiGraph::<PseudoNode>::from(networkx_graph);
info!("Saving to JSON..");
serde_json::to_writer(
&File::create(fname_string).expect("Failed to create writer"),
&networkx_graph_inners,
)
.expect("Unable to write JSON");
} else {
info!("Function {} has no edges. Skipping...", self.name)
}
} else {
info!("Function {} has no edges. Skipping...", self.name)
info!(
"Function {} has less than the minimum number of blocks. Skipping..",
self.name
);
}
} else {
info!(
"Function {} has less than the minimum number of blocks. Skipping..",
"Function {} has already been processed. Skipping...",
self.name
);
)
}
} else {
info!(
"Function {} has already been processed. Skipping...",
self.name
)
}
}

Expand Down
39 changes: 36 additions & 3 deletions src/bb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use serde_aux::prelude::*;
use serde_json::Value;
use serde_with::{serde_as, DefaultOnError};
use std::collections::HashMap;
use std::fmt;
use std::string::String;
#[cfg(feature = "inference")]
use std::sync::Arc;
Expand All @@ -25,6 +26,26 @@ pub enum FeatureType {
Encoded,
Invalid,
Pcode,
Pseudo,
}

impl fmt::Display for FeatureType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let feature_type_str = match self {
FeatureType::Gemini => "gemini",
FeatureType::DiscovRE => "discovre",
FeatureType::DGIS => "dgis",
FeatureType::Tiknib => "tiknib",
FeatureType::Disasm => "disasm",
FeatureType::Esil => "esil",
FeatureType::ModelEmbedded => "embedded",
FeatureType::Encoded => "encoded",
FeatureType::Invalid => "invalid",
FeatureType::Pcode => "pcode",
FeatureType::Pseudo => "pseudo",
};
write!(f, "{}", feature_type_str)
}
}

#[derive(Serialize, Deserialize, Debug, PartialEq, Clone, Copy)]
Expand Down Expand Up @@ -236,9 +257,9 @@ impl ACFJBlock {
let feature_vector: Vec<String> = match feature_type {
FeatureType::Disasm => self.get_disasm_bb(normalise),
FeatureType::Esil => self.get_esil_bb(normalise),
FeatureType::Pseudo => self.get_psuedo_bb(normalise),
_ => unreachable!(),
};

if feature_vector.is_empty() {
error!("Empty feature vector. This means that the feature type is wrong!")
} else {
Expand All @@ -262,7 +283,7 @@ impl ACFJBlock {
for ins in self.ops.iter() {
if ins.r#type != "invalid" {
let opcode = ins
.opcode
.disasm
.as_ref()
.unwrap()
.split_whitespace()
Expand Down Expand Up @@ -327,7 +348,7 @@ impl ACFJBlock {
for ins in self.ops.iter() {
if ins.r#type != "invalid" {
let opcode = ins
.opcode
.disasm
.as_ref()
.unwrap()
.split_whitespace()
Expand Down Expand Up @@ -522,6 +543,18 @@ impl ACFJBlock {
disasm_ins
}

pub fn get_psuedo_bb(&self, reg_norm: bool) -> Vec<String> {
let mut psuedo_ins: Vec<String> = Vec::new();
for op in &self.ops {
if op.opcode.is_some() && op.opcode.as_ref().unwrap().len() > 1 {
let opcode_single = &op.opcode.as_ref().unwrap();
let normd = normalise_disasm_simple(opcode_single, reg_norm);
psuedo_ins.push((*normd).to_string());
}
}
psuedo_ins
}

pub fn get_ins(&self, reg_norm: bool) -> Vec<String> {
let mut disasm_ins: Vec<String> = Vec::new();
for op in &self.ops {
Expand Down
9 changes: 7 additions & 2 deletions src/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -897,13 +897,18 @@ impl FileToBeProcessed {
debug!("Creating r2 handle with debugging");
R2PipeSpawnOptions {
exepath: "radare2".to_owned(),
args: vec!["-e bin.cache=true", "-e log.level=0"],
args: vec!["-e bin.cache=true", "-e log.level=0", "-e asm.pseudo=true"],
}
} else {
debug!("Creating r2 handle without debugging");
R2PipeSpawnOptions {
exepath: "radare2".to_owned(),
args: vec!["-e bin.cache=true", "-e log.level=1", "-2"],
args: vec![
"-e bin.cache=true",
"-e log.level=1",
"-2",
"-e asm.pseudo=true",
],
}
};

Expand Down
4 changes: 3 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ enum GenerateSubCommands {
output_path: PathBuf,

/// The type of features to generate per basic block (node)
#[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib", "disasm", "esil", "pcode"])
#[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib", "disasm", "esil", "pcode", "pseudo"])
.map(|s| s.parse::<String>().unwrap()),)]
feature_type: Option<String>,

Expand Down Expand Up @@ -461,6 +461,7 @@ fn main() {
#[cfg(feature = "inference")]
"embed" => FeatureType::ModelEmbedded,
"pcode" => FeatureType::Pcode,
"pseudo" => FeatureType::Pseudo,
_ => FeatureType::Invalid,
};

Expand All @@ -473,6 +474,7 @@ fn main() {
|| feature_vec_type == FeatureType::Tiknib
|| feature_vec_type == FeatureType::Disasm
|| feature_vec_type == FeatureType::Esil
|| feature_vec_type == FeatureType::Pseudo
{
info!(
"Creating graphs with {:?} feature vectors.",
Expand Down
40 changes: 39 additions & 1 deletion src/networkx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pub enum NodeType {
Disasm(DisasmNode),
Esil(EsilNode),
PCode(PCodeNode),
Pseudo(PseudoNode),
}

#[derive(Debug, Clone, PartialEq, Hash, Serialize, Deserialize, EnumAsInner)]
Expand Down Expand Up @@ -110,6 +111,21 @@ impl From<(i64, &Vec<String>)> for EsilNode {
}
}

#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PseudoNode {
pub id: i64,
pub features: Vec<String>,
}

impl From<(i64, &Vec<String>)> for PseudoNode {
fn from(src: (i64, &Vec<String>)) -> PseudoNode {
PseudoNode {
id: src.0,
features: src.1.to_owned(),
}
}
}

#[derive(Copy, Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TiknibNode {
pub id: i64,
Expand Down Expand Up @@ -434,7 +450,10 @@ impl From<(&Graph<String, u32>, &Vec<Vec<String>>, FeatureType)> for NetworkxDiG
Some(NodeType::Disasm(DisasmNode::from((i as i64, node_vector))))
}
FeatureType::Esil => Some(NodeType::Esil(EsilNode::from((i as i64, node_vector)))),
_ => None,
FeatureType::Pseudo => {
Some(NodeType::Pseudo(PseudoNode::from((i as i64, node_vector))))
}
_ => todo!(),
};
if let Some(node) = node {
nodes.push(node);
Expand Down Expand Up @@ -641,6 +660,25 @@ impl From<NetworkxDiGraph<NodeType>> for NetworkxDiGraph<EsilNode> {
}
}

impl From<NetworkxDiGraph<NodeType>> for NetworkxDiGraph<PseudoNode> {
fn from(src: NetworkxDiGraph<NodeType>) -> NetworkxDiGraph<PseudoNode> {
let inner_nodes_types: Vec<PseudoNode> = src
.clone()
.nodes
.into_iter()
.map(|el| el.as_pseudo().unwrap().clone())
.collect();

NetworkxDiGraph {
adjacency: src.adjacency,
directed: src.directed,
graph: vec![],
multigraph: false,
nodes: inner_nodes_types,
}
}
}

#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PCodeNode {
pub id: u64,
Expand Down

0 comments on commit 0de0ab2

Please sign in to comment.