diff --git a/Cargo.toml b/Cargo.toml index b118eb37..32f611f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ keywords = ["format", "parse", "encode"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +lowcharts = "*" anyhow = "1.0" clap = { version = "4.0.17", features = ["cargo"] } colored = "2.0.0" diff --git a/src/bin/ion/commands/beta/count.rs b/src/bin/ion/commands/beta/analyze/count.rs similarity index 100% rename from src/bin/ion/commands/beta/count.rs rename to src/bin/ion/commands/beta/analyze/count.rs diff --git a/src/bin/ion/commands/beta/analyze/depth.rs b/src/bin/ion/commands/beta/analyze/depth.rs new file mode 100644 index 00000000..26a8e12e --- /dev/null +++ b/src/bin/ion/commands/beta/analyze/depth.rs @@ -0,0 +1,60 @@ +use std::fs::File; +use crate::commands::{IonCliCommand, WithIonCliArgument}; +use anyhow::{bail, Context, Result}; +use clap::{ArgMatches, Command}; +use ion_rs::ElementReader; +use ion_rs::Element; +use ion_rs::{IonReader, IonType, Reader, ReaderBuilder}; + + +pub struct DepthCommand; + +impl IonCliCommand for DepthCommand { + fn name(&self) -> &'static str { + "depth" + } + + fn about(&self) -> &'static str { + "Prints the maximum depth of the input ion stream." + } + + fn configure_args(&self, command: Command) -> Command { + command.with_input() + } + + fn run(&self, _command_path: &mut Vec, args: &ArgMatches) -> Result<()> { + if let Some(input_file_iter) = args.get_many::("input") { + for input_file in input_file_iter { + let file = File::open(input_file) + .with_context(|| format!("Could not open file '{}'", input_file))?; + let mut reader = ReaderBuilder::new().build(file)?; + get_depth(&mut reader)?; + } + } else { + bail!("this command does not yet support reading from STDIN") + }; + Ok(()) + } +} + +fn get_depth(reader: &mut Reader) -> Result<()> { + let mut max_depth = 0; + for element in reader.elements(){ + let unwrap_element = element.unwrap(); + max_depth = calculate_depth(&unwrap_element, 0); + } + println!("The maximum depth is {}",max_depth); + Ok(()) +} + +fn calculate_depth(element: &Element, depth: usize) -> usize { + return if element.ion_type().is_container() { + if element.ion_type() == IonType::Struct { + element.as_struct().unwrap().iter().map(|(_field_name,e)| calculate_depth(e, depth + 1)).max().unwrap_or(depth) + } else { + element.as_sequence().unwrap().into_iter().map(|e| calculate_depth(e, depth + 1)).max().unwrap_or(depth) + } + } else { + depth + } +} \ No newline at end of file diff --git a/src/bin/ion/commands/beta/analyze/mod.rs b/src/bin/ion/commands/beta/analyze/mod.rs new file mode 100644 index 00000000..765312fd --- /dev/null +++ b/src/bin/ion/commands/beta/analyze/mod.rs @@ -0,0 +1,25 @@ +pub mod count; +pub mod size; +pub mod depth; + + +use crate::commands::IonCliCommand; +use crate::commands::beta::analyze::count::CountCommand; +use crate::commands::beta::analyze::size::SizeCommand; +use crate::commands::beta::analyze::depth::DepthCommand; + + +pub struct AnalyzeNamespace; + +impl IonCliCommand for AnalyzeNamespace { + fn name(&self) -> &'static str { + "analyze" + } + + fn about(&self) -> &'static str { + "The 'analyze' command is a namespace for commands used for Ion stream statistical analysis." + } + fn subcommands(&self) -> Vec> { + vec![Box::new(CountCommand), Box::new(SizeCommand), Box::new(DepthCommand)] + } +} diff --git a/src/bin/ion/commands/beta/analyze/size.rs b/src/bin/ion/commands/beta/analyze/size.rs new file mode 100644 index 00000000..790f47c7 --- /dev/null +++ b/src/bin/ion/commands/beta/analyze/size.rs @@ -0,0 +1,71 @@ +use std::fs::File; +use crate::commands::{IonCliCommand, WithIonCliArgument}; +use anyhow::{bail, Context, Result}; +use clap::{ArgMatches, Command}; +use ion_rs::{IonReader, RawBinaryReader, SystemReader, SystemStreamItem}; +use memmap::MmapOptions; +use lowcharts::plot; + + +pub struct SizeCommand; + +impl IonCliCommand for SizeCommand { + fn name(&self) -> &'static str { + "size" + } + + fn about(&self) -> &'static str { + "Prints the overall min, max, mean size of top-level values in the input stream." + } + + fn configure_args(&self, command: Command) -> Command { + command.with_input() + } + + fn run(&self, _command_path: &mut Vec, args: &ArgMatches) -> Result<()> { + if let Some(input_file_names) = args.get_many::("input") { + for input_file in input_file_names { + let file = File::open(input_file.as_str()) + .with_context(|| format!("Could not open file '{}'", &input_file))?; + let mmap = unsafe { + MmapOptions::new() + .map(&file) + .with_context(|| format!("Could not mmap '{}'", input_file))? + }; + // Treat the mmap as a byte array. + let ion_data: &[u8] = &mmap[..]; + let raw_reader = RawBinaryReader::new(ion_data); + let mut system_reader = SystemReader::new(raw_reader); + size_analyze(&mut system_reader); + } + } else { + bail!("this command does not yet support reading from STDIN") + } + Ok(()) + } +} + +fn size_analyze(reader: &mut SystemReader>) -> Result<()> { + let mut vec: Vec = Vec::new(); + loop { + match reader.next()? { + SystemStreamItem::Value(_) => { + let mut size = 0; + if reader.annotations_length() != None { + size = reader.annotations_length().unwrap() + reader.header_length() + reader.value_length(); + } else { + size = reader.header_length() + reader.value_length(); + } + vec.push(size as f64); + }, + SystemStreamItem::Nothing => break, + _ => {} + } + } +// Plot a histogram of the above vector, with 4 buckets and a precision +// chosen by library + let options = plot::HistogramOptions { intervals: 4, ..Default::default() }; + let histogram = plot::Histogram::new(&vec, options); + print!("{}", histogram); + Ok(()) +} diff --git a/src/bin/ion/commands/beta/mod.rs b/src/bin/ion/commands/beta/mod.rs index 86b82e1f..118d8b9f 100644 --- a/src/bin/ion/commands/beta/mod.rs +++ b/src/bin/ion/commands/beta/mod.rs @@ -1,4 +1,3 @@ -pub mod count; pub mod from; #[cfg(feature = "beta-subcommands")] @@ -9,8 +8,9 @@ pub mod primitive; pub mod schema; pub mod symtab; pub mod to; +pub mod analyze; + -use crate::commands::beta::count::CountCommand; use crate::commands::beta::from::FromNamespace; #[cfg(feature = "beta-subcommands")] use crate::commands::beta::generate::GenerateCommand; @@ -18,6 +18,7 @@ use crate::commands::beta::head::HeadCommand; use crate::commands::beta::inspect::InspectCommand; use crate::commands::beta::primitive::PrimitiveCommand; use crate::commands::beta::schema::SchemaNamespace; +use crate::commands::beta::analyze::AnalyzeNamespace; use crate::commands::beta::symtab::SymtabNamespace; use crate::commands::beta::to::ToNamespace; use crate::commands::IonCliCommand; @@ -35,10 +36,10 @@ impl IonCliCommand for BetaNamespace { fn subcommands(&self) -> Vec> { vec![ - Box::new(CountCommand), Box::new(InspectCommand), Box::new(PrimitiveCommand), Box::new(SchemaNamespace), + Box::new(AnalyzeNamespace), Box::new(HeadCommand), Box::new(FromNamespace), Box::new(ToNamespace), diff --git a/src/bin/ion/commands/beta/symtab/count.rs b/src/bin/ion/commands/beta/symtab/count.rs new file mode 100644 index 00000000..dce9fd06 --- /dev/null +++ b/src/bin/ion/commands/beta/symtab/count.rs @@ -0,0 +1,61 @@ +use crate::commands::{IonCliCommand, WithIonCliArgument}; +use anyhow::{bail, Context, Result}; +use clap::{ArgMatches, Command}; +use ion_rs::*; +use std::fs::File; +use ion_rs::RawBinaryReader; +use memmap::MmapOptions; + +pub struct SymbolTableCommand; + +impl IonCliCommand for SymbolTableCommand { + fn name(&self) -> &'static str { + "count" + } + + fn about(&self) -> &'static str { + "Prints the number of symbol tables." + } + + fn configure_args(&self, command: Command) -> Command { + command.with_input() + } + + fn run(&self, _command_path: &mut Vec, args: &ArgMatches) -> Result<()> { + if let Some(input_file_names) = args.get_many::("input") { + // Input files were specified, run the converter on each of them in turn + for input_file in input_file_names { + let file = File::open(input_file.as_str()) + .with_context(|| format!("Could not open file '{}'", &input_file))?; + let mmap = unsafe { + MmapOptions::new() + .map(&file) + .with_context(|| format!("Could not mmap '{}'", input_file))? + }; + // Treat the mmap as a byte array. + let ion_data: &[u8] = &mmap[..]; + let raw_reader = RawBinaryReader::new(ion_data); + let mut system_reader = SystemReader::new(raw_reader); + symbol_tables(&mut system_reader); + } + } else { + bail!("this command does not yet support reading from STDIN") + } + Ok(()) + } +} + +fn symbol_tables(reader: &mut SystemReader>) -> Result<()> { + let mut count = 0; + loop { + match reader.next()? { + SystemStreamItem::SymbolTableValue(IonType::Struct) => { + count += 1; + }, + SystemStreamItem::Nothing => break, + _ => {} + } + } + println!("The number of symbol tables is {} ", count); + Ok(()) +} diff --git a/src/bin/ion/commands/beta/symtab/mod.rs b/src/bin/ion/commands/beta/symtab/mod.rs index 5d789e5c..c37deae7 100644 --- a/src/bin/ion/commands/beta/symtab/mod.rs +++ b/src/bin/ion/commands/beta/symtab/mod.rs @@ -1,7 +1,11 @@ +use crate::commands::beta::symtab::count::SymbolTableCommand; use crate::commands::beta::symtab::filter::SymtabFilterCommand; +use crate::commands::beta::symtab::symbol_count::SymbolNumberCommand; use crate::commands::IonCliCommand; pub mod filter; +pub mod count; +pub mod symbol_count; pub struct SymtabNamespace; @@ -15,6 +19,6 @@ impl IonCliCommand for SymtabNamespace { } fn subcommands(&self) -> Vec> { - vec![Box::new(SymtabFilterCommand)] + vec![Box::new(SymtabFilterCommand), Box::new(SymbolTableCommand), Box::new(SymbolNumberCommand)] } } diff --git a/src/bin/ion/commands/beta/symtab/symbol_count.rs b/src/bin/ion/commands/beta/symtab/symbol_count.rs new file mode 100644 index 00000000..c969cc72 --- /dev/null +++ b/src/bin/ion/commands/beta/symtab/symbol_count.rs @@ -0,0 +1,63 @@ +use crate::commands::{IonCliCommand, WithIonCliArgument}; +use anyhow::{bail, Context, Result}; +use clap::{ArgMatches, Command}; +use ion_rs::*; +use std::fs::{File}; +use memmap::MmapOptions; + + +pub struct SymbolNumberCommand; + +impl IonCliCommand for SymbolNumberCommand { + fn name(&self) -> &'static str { + "symbol_count" + } + + fn about(&self) -> &'static str { + "Prints the number of symbols." + } + + fn configure_args(&self, command: Command) -> Command { + command.with_input() + } + + fn run(&self, _command_path: &mut Vec, args: &ArgMatches) -> Result<()> { + if let Some(input_file_names) = args.get_many::("input") { + // Input files were specified, run the converter on each of them in turn + for input_file in input_file_names { + let file = File::open(input_file.as_str()) + .with_context(|| format!("Could not open file '{}'", &input_file))?; + let mmap = unsafe { + MmapOptions::new() + .map(&file) + .with_context(|| format!("Could not mmap '{}'", input_file))? + }; + // Treat the mmap as a byte array. + let ion_data: &[u8] = &mmap[..]; + let raw_reader = RawBinaryReader::new(ion_data); + let mut system_reader = SystemReader::new(raw_reader); + symtab_number(&mut system_reader); + } + } else { + bail!("this command does not yet support reading from STDIN") + } + Ok(()) + } +} + +fn symtab_number(reader: &mut SystemReader>) -> Result<()> { + let mut count = 0; + loop { + match reader.next()? { + SystemStreamItem:: Value(_)=> { + let symbols_len = reader.symbol_table().symbols().iter().len(); + // Reduce the number of system symbols. + count += symbols_len - 10; + } + SystemStreamItem::Nothing => break, + _ => {} + } + } + println!("The number of symbols is {}", count); + Ok(()) +} diff --git a/tests/cli.rs b/tests/cli.rs index 6dcf30ee..c0d483ce 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -6,6 +6,8 @@ use std::fs; use std::fs::File; use std::io::{Read, Write}; use std::time::Duration; +use ion_rs::{BinaryWriterBuilder, IonWriter}; + use tempfile::TempDir; enum FileMode { @@ -218,6 +220,113 @@ fn test_write_all_values(#[case] number: i32, #[case] expected_output: &str) -> Ok(()) } +#[cfg(feature = "beta-subcommands")] +#[rstest] +#[case("{foo: bar, abc: [123, 456]}", "The maximum depth is 2")] +///Calls ion-cli beta head with different requested number. Pass the test if the return value equals to the expected value. +fn test_analyze_depth(#[case] test_data: &str, #[case] expected_output: &str) -> Result<()> { + let mut cmd = Command::cargo_bin("ion")?; + let temp_dir = TempDir::new()?; + let input_path = temp_dir.path().join("test.ion"); + let mut input_file = File::create(&input_path)?; + input_file.write_all(test_data.as_bytes())?; + input_file.flush()?; + cmd.args([ + "beta", + "analyze", + "depth", + input_path.to_str().unwrap(), + ]); + let command_assert = cmd.assert(); + let output = command_assert.get_output(); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout.trim_end(), expected_output); + Ok(()) +} + +#[cfg(feature = "beta-subcommands")] +#[rstest] +#[case("{foo: bar, abc: [123, 456]}", "Min size: 10 bytes\nMax size: 10 bytes\nMean size: 10 bytes")] +fn test_analyze_size(#[case] test_data: &str, #[case] expected_output: &str) -> Result<()> { + let mut cmd = Command::cargo_bin("ion")?; + let temp_dir = TempDir::new()?; + let input_path = temp_dir.path().join("test.10n"); + let mut input_file = File::create(&input_path)?; + let mut writer = BinaryWriterBuilder::new().build(&mut input_file)?; + let test= Element::read_one(test_data); + test.unwrap().write_to(&mut writer); + writer.flush()?; + input_file.flush()?; + cmd.args([ + "beta", + "analyze", + "size", + input_path.to_str().unwrap(), + ]); + let command_assert = cmd.assert(); + let output = command_assert.get_output(); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout.trim_end(), expected_output); + Ok(()) +} + +#[cfg(feature = "beta-subcommands")] +#[rstest] +#[case("[123, 456]", "The number of symbols is 0")] +#[case("{foo: 123, abc: [123, 456]}", "The number of symbols is 2")] +#[case("{foo: bar, abc: [123, 456]}", "The number of symbols is 3")] +///Calls ion-cli beta head with different requested number. Pass the test if the return value equals to the expected value. +fn test_symbol_count(#[case] test_data: &str, #[case] expected_out: &str) -> Result<()> { + let mut cmd = Command::cargo_bin("ion")?; + let temp_dir = TempDir::new()?; + let input_path = temp_dir.path().join("test.10n"); + let mut input_file = File::create(&input_path)?; + let mut writer = BinaryWriterBuilder::new().build(&mut input_file)?; + let test= Element::read_one(test_data); + test.unwrap().write_to(&mut writer); + writer.flush()?; + input_file.flush()?; + cmd.args([ + "beta", + "symtab", + "symbol_count", + input_path.to_str().unwrap(), + ]); + let command_assert = cmd.assert(); + let output = command_assert.get_output(); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout.trim_end(), expected_out); + Ok(()) +} + +#[cfg(feature = "beta-subcommands")] +#[rstest] +#[case("{foo: 123, abc: [123, 456]}", "The number of symbol tables is 2")] +#[case("{foo: bar, abc: [123, 456]}", "The number of symbols is 3")] +///Calls ion-cli beta head with different requested number. Pass the test if the return value equals to the expected value. +fn test_symtab_count(#[case] test_data: &str, #[case] expected_out: &str) -> Result<()> { + let mut cmd = Command::cargo_bin("ion")?; + let temp_dir = TempDir::new()?; + let input_path = temp_dir.path().join("test.10n"); + let mut input_file = File::create(&input_path)?; + let mut writer = BinaryWriterBuilder::new().build(&mut input_file)?; + let test= Element::read_one(test_data); + test.unwrap().write_to(&mut writer); + writer.flush()?; + input_file.flush()?; + cmd.args([ + "beta", + "symtab", + "symbol_count", + input_path.to_str().unwrap(), + ]); + let command_assert = cmd.assert(); + let output = command_assert.get_output(); + let stdout = String::from_utf8_lossy(&output.stdout); + assert_eq!(stdout.trim_end(), expected_out); + Ok(()) +} + #[cfg(feature = "beta-subcommands")] #[rstest] #[case(