diff --git a/src/agfj.rs b/src/agfj.rs index 3bcf843..8d64697 100644 --- a/src/agfj.rs +++ b/src/agfj.rs @@ -2,7 +2,8 @@ use crate::bb::{ACFJBlock, FeatureType, TikNibFeaturesBB}; #[cfg(feature = "inference")] use crate::inference::InferenceJob; use crate::networkx::{ - DGISNode, DisasmNode, DiscovreNode, EsilNode, GeminiNode, NetworkxDiGraph, NodeType, TiknibNode, + DGISNode, DisasmNode, DiscovreNode, EsilNode, GeminiNode, NetworkxDiGraph, NodeType, + PseudoNode, TiknibNode, }; use crate::utils::{average, check_or_create_dir, get_save_file_path}; use enum_as_inner::EnumAsInner; @@ -110,6 +111,29 @@ impl AGFJFunc { } } + pub fn get_psuedo_function_string( + &self, + min_blocks: &u16, + reg_norm: bool, + ) -> Option<(String, String)> { + let mut psuedo_function = Vec::::new(); + if self.blocks.len() >= (*min_blocks).into() && self.blocks[0].offset != 1 { + for bb in &self.blocks { + let psuedo: Vec = bb.get_psuedo_bb(reg_norm); + for ins in psuedo.iter() { + if !ins.is_empty() { + let split: Vec = ins.split(',').map(|s| s.to_string()).collect(); + let split_joined = split.join(" "); + psuedo_function.push(split_joined); + } + } + } + let joined = psuedo_function.join(" "); + Some((self.name.clone(), joined)) + } else { + None + } + } pub fn create_bb_edge_list(&mut self, min_blocks: &u16) { if self.blocks.len() > (*min_blocks).into() && self.blocks[0].offset != 1 { let mut addr_idxs = Vec::::new(); @@ -340,7 +364,13 @@ impl AGFJFunc { feature_type: FeatureType, architecture: &String, ) { - let full_output_path = get_save_file_path(path, output_path, None, None, None); + let full_output_path = get_save_file_path( + path, + output_path, + None, + Some(feature_type.to_string()), + None, + ); check_or_create_dir(&full_output_path); let file_name = path.file_name().unwrap(); let binding = file_name.to_string_lossy().to_string(); @@ -371,12 +401,14 @@ impl AGFJFunc { | FeatureType::Gemini | FeatureType::DiscovRE | FeatureType::DGIS => StringOrF64::F64(Vec::new()), - FeatureType::Esil | FeatureType::Disasm => StringOrF64::String(Vec::new()), + FeatureType::Esil + | FeatureType::Disasm + | FeatureType::Pseudo + | FeatureType::Pcode => StringOrF64::String(Vec::new()), FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => { info!("Invalid Feature Type. Skipping.."); return; } - FeatureType::Pcode => StringOrF64::String(Vec::new()), }; let min_offset: u64 = self.offset; @@ -397,7 +429,7 @@ impl AGFJFunc { bb.generate_bb_feature_vec(feature_vecs, feature_type, architecture); } } - FeatureType::Esil | FeatureType::Disasm => { + FeatureType::Esil | FeatureType::Disasm | FeatureType::Pseudo => { let feature_vecs = feature_vecs.as_string_mut().unwrap(); for bb in &self.blocks { bb.get_block_edges( @@ -408,6 +440,7 @@ impl AGFJFunc { ); bb.generate_bb_feature_strings(feature_vecs, feature_type, true); } + debug!("Number of Feature Vecs: {}", feature_vecs.len()) } FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => { info!("Invalid Feature Type. Skipping.."); @@ -416,6 +449,11 @@ impl AGFJFunc { _ => {} }; + debug!( + "Edge List Empty: {} Edge List Dims: {}", + edge_list.is_empty(), + edge_list.len() + ); if !edge_list.is_empty() { let mut graph = Graph::::from_edges(&edge_list); @@ -519,21 +557,37 @@ impl AGFJFunc { &networkx_graph_inners, ) .expect("Unable to write JSON"); + } else if feature_type == FeatureType::Pseudo { + let networkx_graph: NetworkxDiGraph = + NetworkxDiGraph::::from(( + &graph, + feature_vecs.as_string().unwrap(), + feature_type, + )); + + let networkx_graph_inners: NetworkxDiGraph = + NetworkxDiGraph::::from(networkx_graph); + info!("Saving to JSON.."); + serde_json::to_writer( + &File::create(fname_string).expect("Failed to create writer"), + &networkx_graph_inners, + ) + .expect("Unable to write JSON"); + } else { + info!("Function {} has no edges. Skipping...", self.name) } } else { - info!("Function {} has no edges. Skipping...", self.name) + info!( + "Function {} has less than the minimum number of blocks. Skipping..", + self.name + ); } } else { info!( - "Function {} has less than the minimum number of blocks. Skipping..", + "Function {} has already been processed. Skipping...", self.name - ); + ) } - } else { - info!( - "Function {} has already been processed. Skipping...", - self.name - ) } } diff --git a/src/bb.rs b/src/bb.rs index 24bf818..39ba083 100644 --- a/src/bb.rs +++ b/src/bb.rs @@ -7,6 +7,7 @@ use serde_aux::prelude::*; use serde_json::Value; use serde_with::{serde_as, DefaultOnError}; use std::collections::HashMap; +use std::fmt; use std::string::String; #[cfg(feature = "inference")] use std::sync::Arc; @@ -25,6 +26,26 @@ pub enum FeatureType { Encoded, Invalid, Pcode, + Pseudo, +} + +impl fmt::Display for FeatureType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let feature_type_str = match self { + FeatureType::Gemini => "gemini", + FeatureType::DiscovRE => "discovre", + FeatureType::DGIS => "dgis", + FeatureType::Tiknib => "tiknib", + FeatureType::Disasm => "disasm", + FeatureType::Esil => "esil", + FeatureType::ModelEmbedded => "embedded", + FeatureType::Encoded => "encoded", + FeatureType::Invalid => "invalid", + FeatureType::Pcode => "pcode", + FeatureType::Pseudo => "pseudo", + }; + write!(f, "{}", feature_type_str) + } } #[derive(Serialize, Deserialize, Debug, PartialEq, Clone, Copy)] @@ -236,9 +257,9 @@ impl ACFJBlock { let feature_vector: Vec = match feature_type { FeatureType::Disasm => self.get_disasm_bb(normalise), FeatureType::Esil => self.get_esil_bb(normalise), + FeatureType::Pseudo => self.get_psuedo_bb(normalise), _ => unreachable!(), }; - if feature_vector.is_empty() { error!("Empty feature vector. This means that the feature type is wrong!") } else { @@ -262,7 +283,7 @@ impl ACFJBlock { for ins in self.ops.iter() { if ins.r#type != "invalid" { let opcode = ins - .opcode + .disasm .as_ref() .unwrap() .split_whitespace() @@ -327,7 +348,7 @@ impl ACFJBlock { for ins in self.ops.iter() { if ins.r#type != "invalid" { let opcode = ins - .opcode + .disasm .as_ref() .unwrap() .split_whitespace() @@ -522,6 +543,18 @@ impl ACFJBlock { disasm_ins } + pub fn get_psuedo_bb(&self, reg_norm: bool) -> Vec { + let mut psuedo_ins: Vec = Vec::new(); + for op in &self.ops { + if op.opcode.is_some() && op.opcode.as_ref().unwrap().len() > 1 { + let opcode_single = &op.opcode.as_ref().unwrap(); + let normd = normalise_disasm_simple(opcode_single, reg_norm); + psuedo_ins.push((*normd).to_string()); + } + } + psuedo_ins + } + pub fn get_ins(&self, reg_norm: bool) -> Vec { let mut disasm_ins: Vec = Vec::new(); for op in &self.ops { diff --git a/src/extract.rs b/src/extract.rs index edfad28..d0a3733 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -897,13 +897,18 @@ impl FileToBeProcessed { debug!("Creating r2 handle with debugging"); R2PipeSpawnOptions { exepath: "radare2".to_owned(), - args: vec!["-e bin.cache=true", "-e log.level=0"], + args: vec!["-e bin.cache=true", "-e log.level=0", "-e asm.pseudo=true"], } } else { debug!("Creating r2 handle without debugging"); R2PipeSpawnOptions { exepath: "radare2".to_owned(), - args: vec!["-e bin.cache=true", "-e log.level=1", "-2"], + args: vec![ + "-e bin.cache=true", + "-e log.level=1", + "-2", + "-e asm.pseudo=true", + ], } }; diff --git a/src/main.rs b/src/main.rs index f9e3a59..0a8f35c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -114,7 +114,7 @@ enum GenerateSubCommands { output_path: PathBuf, /// The type of features to generate per basic block (node) - #[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib", "disasm", "esil", "pcode"]) + #[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib", "disasm", "esil", "pcode", "pseudo"]) .map(|s| s.parse::().unwrap()),)] feature_type: Option, @@ -461,6 +461,7 @@ fn main() { #[cfg(feature = "inference")] "embed" => FeatureType::ModelEmbedded, "pcode" => FeatureType::Pcode, + "pseudo" => FeatureType::Pseudo, _ => FeatureType::Invalid, }; @@ -473,6 +474,7 @@ fn main() { || feature_vec_type == FeatureType::Tiknib || feature_vec_type == FeatureType::Disasm || feature_vec_type == FeatureType::Esil + || feature_vec_type == FeatureType::Pseudo { info!( "Creating graphs with {:?} feature vectors.", diff --git a/src/networkx.rs b/src/networkx.rs index e4f8a33..84b55b2 100644 --- a/src/networkx.rs +++ b/src/networkx.rs @@ -50,6 +50,7 @@ pub enum NodeType { Disasm(DisasmNode), Esil(EsilNode), PCode(PCodeNode), + Pseudo(PseudoNode), } #[derive(Debug, Clone, PartialEq, Hash, Serialize, Deserialize, EnumAsInner)] @@ -110,6 +111,21 @@ impl From<(i64, &Vec)> for EsilNode { } } +#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PseudoNode { + pub id: i64, + pub features: Vec, +} + +impl From<(i64, &Vec)> for PseudoNode { + fn from(src: (i64, &Vec)) -> PseudoNode { + PseudoNode { + id: src.0, + features: src.1.to_owned(), + } + } +} + #[derive(Copy, Default, Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct TiknibNode { pub id: i64, @@ -434,7 +450,10 @@ impl From<(&Graph, &Vec>, FeatureType)> for NetworkxDiG Some(NodeType::Disasm(DisasmNode::from((i as i64, node_vector)))) } FeatureType::Esil => Some(NodeType::Esil(EsilNode::from((i as i64, node_vector)))), - _ => None, + FeatureType::Pseudo => { + Some(NodeType::Pseudo(PseudoNode::from((i as i64, node_vector)))) + } + _ => todo!(), }; if let Some(node) = node { nodes.push(node); @@ -641,6 +660,25 @@ impl From> for NetworkxDiGraph { } } +impl From> for NetworkxDiGraph { + fn from(src: NetworkxDiGraph) -> NetworkxDiGraph { + let inner_nodes_types: Vec = src + .clone() + .nodes + .into_iter() + .map(|el| el.as_pseudo().unwrap().clone()) + .collect(); + + NetworkxDiGraph { + adjacency: src.adjacency, + directed: src.directed, + graph: vec![], + multigraph: false, + nodes: inner_nodes_types, + } + } +} + #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct PCodeNode { pub id: u64,