diff --git a/src/extract.rs b/src/extract.rs index 928233a..7cd46aa 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -18,7 +18,6 @@ use std::env; use std::fs; use std::fs::File; - use std::path::{Path, PathBuf}; use walkdir::WalkDir; @@ -43,6 +42,7 @@ pub enum ExtractionJobType { PCodeBB, LocalVariableXrefs, GlobalStrings, + FunctionBytes, } #[derive(Debug)] @@ -327,6 +327,11 @@ pub struct StringEntry { pub string: String, } +#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct FuncBytes { + pub bytes: Vec, +} + impl ExtractionJob { pub fn new( input_path: &PathBuf, @@ -363,6 +368,7 @@ impl ExtractionJob { "pcode-bb" => Ok(ExtractionJobType::PCodeBB), "localvar-xrefs" => Ok(ExtractionJobType::LocalVariableXrefs), "strings" => Ok(ExtractionJobType::GlobalStrings), + "bytes" => Ok(ExtractionJobType::FunctionBytes), _ => bail!("Incorrect command type - got {}", mode), } } @@ -738,7 +744,52 @@ impl FileToBeProcessed { } } + pub fn extract_function_bytes(&self) { + info!("Starting function bytes extraction"); + let mut r2p = self.setup_r2_pipe(); + let function_details = self.get_function_name_list(&mut r2p); + + if function_details.is_ok() { + for function in function_details.unwrap().iter() { + debug!( + "Function Name: {} Offset: {} Size: {}", + function.name, function.offset, function.size + ); + let function_bytes = self.get_bytes_function(function.offset, &mut r2p); + if let Ok(valid_bytes_obj) = function_bytes { + Self::write_to_bin(self, &function.name, &valid_bytes_obj.bytes) + .expect("Failed to write bytes to bin."); + }; + } + info!("Function bytes successfully extracted"); + r2p.close(); + info!("r2p closed"); + } else { + error!( + "Failed to extract function bytes - Error in r2 extraction for {:?}", + self.file_path + ) + } + } + // r2 commands to structs + fn get_bytes_function( + &self, + function_addr: u64, + r2p: &mut R2Pipe, + ) -> Result { + Self::go_to_address(r2p, function_addr); + + let function_bytes = r2p.cmd(format!("pcs @ {}", function_addr).as_str())?; + let function_bytes = function_bytes.replace('"', ""); + + let function_bytes = crate::utils::parse_hex_escapes(function_bytes); + + Ok(FuncBytes { + bytes: function_bytes, + }) + } + fn get_ghidra_pcode_function( &self, function_addr: u64, @@ -899,8 +950,29 @@ impl FileToBeProcessed { .unwrap_or_else(|_| panic!("the world is ending: {:?}", output_filepath)); } + fn write_to_bin(&self, function_name: &String, func_bytes: &[u8]) -> Result<()> { + let mut fp_filename = self + .file_path + .file_name() + .expect("Unable to get filename") + .to_string_lossy() + .to_string(); + + fp_filename = fp_filename + "/" + function_name + ".bin"; + + let mut output_filepath = PathBuf::new(); + output_filepath.push(self.output_path.clone()); + output_filepath.push(fp_filename); + + let prefix = output_filepath.parent().unwrap(); + fs::create_dir_all(prefix).unwrap(); + + fs::write(output_filepath, func_bytes).unwrap(); + Ok(()) + } + fn go_to_address(r2p: &mut R2Pipe, function_addr: u64) { - r2p.cmd(format!("s @ {}", function_addr).as_str()) + r2p.cmd(format!("s {}", function_addr).as_str()) .expect("failed to seek addr"); } diff --git a/src/main.rs b/src/main.rs index e3e2384..8d5f72c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -281,7 +281,7 @@ enum Commands { output_dir: PathBuf, /// The extraction mode - #[arg(short, long, value_name = "EXTRACT_MODE", value_parser = clap::builder::PossibleValuesParser::new(["finfo", "reg", "cfg", "func-xrefs","cg", "decomp", "pcode-func", "pcode-bb", "localvar-xrefs", "strings"]) + #[arg(short, long, value_name = "EXTRACT_MODE", value_parser = clap::builder::PossibleValuesParser::new(["finfo", "reg", "cfg", "func-xrefs","cg", "decomp", "pcode-func", "pcode-bb", "localvar-xrefs", "strings", "bytes"]) .map(|s| s.parse::().unwrap()),)] mode: String, @@ -1126,7 +1126,17 @@ fn main() { .par_iter() .progress() .for_each(|path| path.extract_local_variable_xrefs()); - } + } else if job.job_type == ExtractionJobType::GlobalStrings { + job.files_to_be_processed + .par_iter() + .progress() + .for_each(|path| path.extract_global_strings()); + } else if job.job_type == ExtractionJobType::FunctionBytes { + job.files_to_be_processed + .par_iter() + .progress() + .for_each(|path| path.extract_function_bytes()); + }; } else if job.input_path_type == PathType::File { info!("Single file found"); if job.job_type == ExtractionJobType::CFG { @@ -1155,6 +1165,8 @@ fn main() { job.files_to_be_processed[0].extract_local_variable_xrefs() } else if job.job_type == ExtractionJobType::GlobalStrings { job.files_to_be_processed[0].extract_global_strings() + } else if job.job_type == ExtractionJobType::FunctionBytes { + job.files_to_be_processed[0].extract_function_bytes() } else { error!("Unsupported ExtractionJobType of {:?}", job.job_type) } diff --git a/src/utils.rs b/src/utils.rs index 7f30dc1..3ce29fc 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -109,6 +109,26 @@ pub fn check_or_create_dir(full_output_path: &PathBuf) { pub fn average(numbers: Vec) -> f32 { numbers.iter().sum::() / numbers.len() as f32 } + +/// Parse hex string output from radare2 to remove characters and consume into Vec +pub fn parse_hex_escapes(s: String) -> Vec { + let mut bytes = Vec::new(); + let mut chars = s.chars().peekable(); + + while let Some(c) = chars.next() { + if c == '\\' && chars.peek() == Some(&'x') { + chars.next(); + let hex: String = chars.by_ref().take(2).collect(); + if hex.len() == 2 { + if let Ok(byte) = u8::from_str_radix(&hex, 16) { + bytes.push(byte); + } + } + } + } + bytes +} + #[cfg(test)] mod tests { use super::*;