Skip to content

Commit

Permalink
Add subcommands to support data stream analysis.
Browse files Browse the repository at this point in the history
  • Loading branch information
linlin-s committed Feb 9, 2024
1 parent 6cf4431 commit 929d015
Show file tree
Hide file tree
Showing 10 changed files with 399 additions and 4 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ keywords = ["format", "parse", "encode"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
lowcharts = "*"
anyhow = "1.0"
clap = { version = "4.0.17", features = ["cargo"] }
colored = "2.0.0"
Expand Down
File renamed without changes.
60 changes: 60 additions & 0 deletions src/bin/ion/commands/beta/analyze/depth.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use std::fs::File;
use crate::commands::{IonCliCommand, WithIonCliArgument};
use anyhow::{bail, Context, Result};
use clap::{ArgMatches, Command};
use ion_rs::ElementReader;
use ion_rs::Element;
use ion_rs::{IonReader, IonType, Reader, ReaderBuilder};

Check warning on line 7 in src/bin/ion/commands/beta/analyze/depth.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `IonReader`

Check warning on line 7 in src/bin/ion/commands/beta/analyze/depth.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused import: `IonReader`


pub struct DepthCommand;

impl IonCliCommand for DepthCommand {
fn name(&self) -> &'static str {
"depth"
}

fn about(&self) -> &'static str {
"Prints the maximum depth of the input ion stream."
}

fn configure_args(&self, command: Command) -> Command {
command.with_input()
}

fn run(&self, _command_path: &mut Vec<String>, args: &ArgMatches) -> Result<()> {
if let Some(input_file_iter) = args.get_many::<String>("input") {
for input_file in input_file_iter {
let file = File::open(input_file)
.with_context(|| format!("Could not open file '{}'", input_file))?;
let mut reader = ReaderBuilder::new().build(file)?;
get_depth(&mut reader)?;
}
} else {
bail!("this command does not yet support reading from STDIN")
};
Ok(())
}
}

fn get_depth(reader: &mut Reader) -> Result<()> {
let mut max_depth = 0;
for element in reader.elements(){
let unwrap_element = element.unwrap();
max_depth = calculate_depth(&unwrap_element, 0);
}
println!("The maximum depth is {}",max_depth);
Ok(())
}

fn calculate_depth(element: &Element, depth: usize) -> usize {
return if element.ion_type().is_container() {
if element.ion_type() == IonType::Struct {
element.as_struct().unwrap().iter().map(|(_field_name,e)| calculate_depth(e, depth + 1)).max().unwrap_or(depth)
} else {
element.as_sequence().unwrap().into_iter().map(|e| calculate_depth(e, depth + 1)).max().unwrap_or(depth)
}
} else {
depth
}
}
25 changes: 25 additions & 0 deletions src/bin/ion/commands/beta/analyze/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
pub mod count;
pub mod size;
pub mod depth;


use crate::commands::IonCliCommand;
use crate::commands::beta::analyze::count::CountCommand;
use crate::commands::beta::analyze::size::SizeCommand;
use crate::commands::beta::analyze::depth::DepthCommand;


pub struct AnalyzeNamespace;

impl IonCliCommand for AnalyzeNamespace {
fn name(&self) -> &'static str {
"analyze"
}

fn about(&self) -> &'static str {
"The 'analyze' command is a namespace for commands used for Ion stream statistical analysis."
}
fn subcommands(&self) -> Vec<Box<dyn IonCliCommand>> {
vec![Box::new(CountCommand), Box::new(SizeCommand), Box::new(DepthCommand)]
}
}
71 changes: 71 additions & 0 deletions src/bin/ion/commands/beta/analyze/size.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use std::fs::File;
use crate::commands::{IonCliCommand, WithIonCliArgument};
use anyhow::{bail, Context, Result};
use clap::{ArgMatches, Command};
use ion_rs::{IonReader, RawBinaryReader, SystemReader, SystemStreamItem};
use memmap::MmapOptions;
use lowcharts::plot;


pub struct SizeCommand;

impl IonCliCommand for SizeCommand {
fn name(&self) -> &'static str {
"size"
}

fn about(&self) -> &'static str {
"Prints the overall min, max, mean size of top-level values in the input stream."
}

fn configure_args(&self, command: Command) -> Command {
command.with_input()
}

fn run(&self, _command_path: &mut Vec<String>, args: &ArgMatches) -> Result<()> {
if let Some(input_file_names) = args.get_many::<String>("input") {
for input_file in input_file_names {
let file = File::open(input_file.as_str())
.with_context(|| format!("Could not open file '{}'", &input_file))?;
let mmap = unsafe {
MmapOptions::new()
.map(&file)
.with_context(|| format!("Could not mmap '{}'", input_file))?
};
// Treat the mmap as a byte array.
let ion_data: &[u8] = &mmap[..];
let raw_reader = RawBinaryReader::new(ion_data);
let mut system_reader = SystemReader::new(raw_reader);
size_analyze(&mut system_reader);

Check warning on line 39 in src/bin/ion/commands/beta/analyze/size.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused `Result` that must be used

Check warning on line 39 in src/bin/ion/commands/beta/analyze/size.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused `Result` that must be used
}
} else {
bail!("this command does not yet support reading from STDIN")
}
Ok(())
}
}

fn size_analyze(reader: &mut SystemReader<RawBinaryReader<&[u8]>>) -> Result<()> {
let mut vec: Vec<f64> = Vec::new();
loop {
match reader.next()? {
SystemStreamItem::Value(_) => {
let mut size = 0;

Check warning on line 53 in src/bin/ion/commands/beta/analyze/size.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

value assigned to `size` is never read

Check warning on line 53 in src/bin/ion/commands/beta/analyze/size.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

value assigned to `size` is never read
if reader.annotations_length() != None {
size = reader.annotations_length().unwrap() + reader.header_length() + reader.value_length();
} else {
size = reader.header_length() + reader.value_length();
}
vec.push(size as f64);
},
SystemStreamItem::Nothing => break,
_ => {}
}
}
// Plot a histogram of the above vector, with 4 buckets and a precision
// chosen by library
let options = plot::HistogramOptions { intervals: 4, ..Default::default() };
let histogram = plot::Histogram::new(&vec, options);
print!("{}", histogram);
Ok(())
}
7 changes: 4 additions & 3 deletions src/bin/ion/commands/beta/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
pub mod count;
pub mod from;

#[cfg(feature = "beta-subcommands")]
Expand All @@ -9,15 +8,17 @@ pub mod primitive;
pub mod schema;
pub mod symtab;
pub mod to;
pub mod analyze;


use crate::commands::beta::count::CountCommand;
use crate::commands::beta::from::FromNamespace;
#[cfg(feature = "beta-subcommands")]
use crate::commands::beta::generate::GenerateCommand;
use crate::commands::beta::head::HeadCommand;
use crate::commands::beta::inspect::InspectCommand;
use crate::commands::beta::primitive::PrimitiveCommand;
use crate::commands::beta::schema::SchemaNamespace;
use crate::commands::beta::analyze::AnalyzeNamespace;
use crate::commands::beta::symtab::SymtabNamespace;
use crate::commands::beta::to::ToNamespace;
use crate::commands::IonCliCommand;
Expand All @@ -35,10 +36,10 @@ impl IonCliCommand for BetaNamespace {

fn subcommands(&self) -> Vec<Box<dyn IonCliCommand>> {
vec![
Box::new(CountCommand),
Box::new(InspectCommand),
Box::new(PrimitiveCommand),
Box::new(SchemaNamespace),
Box::new(AnalyzeNamespace),
Box::new(HeadCommand),
Box::new(FromNamespace),
Box::new(ToNamespace),
Expand Down
61 changes: 61 additions & 0 deletions src/bin/ion/commands/beta/symtab/count.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use crate::commands::{IonCliCommand, WithIonCliArgument};
use anyhow::{bail, Context, Result};
use clap::{ArgMatches, Command};
use ion_rs::*;
use std::fs::File;
use ion_rs::RawBinaryReader;
use memmap::MmapOptions;

pub struct SymbolTableCommand;

impl IonCliCommand for SymbolTableCommand {
fn name(&self) -> &'static str {
"count"
}

fn about(&self) -> &'static str {
"Prints the number of symbol tables."
}

fn configure_args(&self, command: Command) -> Command {
command.with_input()
}

fn run(&self, _command_path: &mut Vec<String>, args: &ArgMatches) -> Result<()> {
if let Some(input_file_names) = args.get_many::<String>("input") {
// Input files were specified, run the converter on each of them in turn
for input_file in input_file_names {
let file = File::open(input_file.as_str())
.with_context(|| format!("Could not open file '{}'", &input_file))?;
let mmap = unsafe {
MmapOptions::new()
.map(&file)
.with_context(|| format!("Could not mmap '{}'", input_file))?
};
// Treat the mmap as a byte array.
let ion_data: &[u8] = &mmap[..];
let raw_reader = RawBinaryReader::new(ion_data);
let mut system_reader = SystemReader::new(raw_reader);
symbol_tables(&mut system_reader);

Check warning on line 39 in src/bin/ion/commands/beta/symtab/count.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused `Result` that must be used

Check warning on line 39 in src/bin/ion/commands/beta/symtab/count.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused `Result` that must be used
}
} else {
bail!("this command does not yet support reading from STDIN")
}
Ok(())
}
}

fn symbol_tables(reader: &mut SystemReader<RawBinaryReader<&[u8]>>) -> Result<()> {
let mut count = 0;
loop {
match reader.next()? {
SystemStreamItem::SymbolTableValue(IonType::Struct) => {
count += 1;
},
SystemStreamItem::Nothing => break,
_ => {}
}
}
println!("The number of symbol tables is {} ", count);
Ok(())
}
6 changes: 5 additions & 1 deletion src/bin/ion/commands/beta/symtab/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
use crate::commands::beta::symtab::count::SymbolTableCommand;
use crate::commands::beta::symtab::filter::SymtabFilterCommand;
use crate::commands::beta::symtab::symbol_count::SymbolNumberCommand;
use crate::commands::IonCliCommand;

pub mod filter;
pub mod count;
pub mod symbol_count;

pub struct SymtabNamespace;

Expand All @@ -15,6 +19,6 @@ impl IonCliCommand for SymtabNamespace {
}

fn subcommands(&self) -> Vec<Box<dyn IonCliCommand>> {
vec![Box::new(SymtabFilterCommand)]
vec![Box::new(SymtabFilterCommand), Box::new(SymbolTableCommand), Box::new(SymbolNumberCommand)]
}
}
63 changes: 63 additions & 0 deletions src/bin/ion/commands/beta/symtab/symbol_count.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
use crate::commands::{IonCliCommand, WithIonCliArgument};
use anyhow::{bail, Context, Result};
use clap::{ArgMatches, Command};
use ion_rs::*;
use std::fs::{File};
use memmap::MmapOptions;


pub struct SymbolNumberCommand;

impl IonCliCommand for SymbolNumberCommand {
fn name(&self) -> &'static str {
"symbol_count"
}

fn about(&self) -> &'static str {
"Prints the number of symbols."
}

fn configure_args(&self, command: Command) -> Command {
command.with_input()
}

fn run(&self, _command_path: &mut Vec<String>, args: &ArgMatches) -> Result<()> {
if let Some(input_file_names) = args.get_many::<String>("input") {
// Input files were specified, run the converter on each of them in turn
for input_file in input_file_names {
let file = File::open(input_file.as_str())
.with_context(|| format!("Could not open file '{}'", &input_file))?;
let mmap = unsafe {
MmapOptions::new()
.map(&file)
.with_context(|| format!("Could not mmap '{}'", input_file))?
};
// Treat the mmap as a byte array.
let ion_data: &[u8] = &mmap[..];
let raw_reader = RawBinaryReader::new(ion_data);
let mut system_reader = SystemReader::new(raw_reader);
symtab_number(&mut system_reader);

Check warning on line 39 in src/bin/ion/commands/beta/symtab/symbol_count.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused `Result` that must be used

Check warning on line 39 in src/bin/ion/commands/beta/symtab/symbol_count.rs

View workflow job for this annotation

GitHub Actions / Build and Test (ubuntu-latest)

unused `Result` that must be used
}
} else {
bail!("this command does not yet support reading from STDIN")
}
Ok(())
}
}

fn symtab_number(reader: &mut SystemReader<RawBinaryReader<&[u8]>>) -> Result<()> {
let mut count = 0;
loop {
match reader.next()? {
SystemStreamItem:: Value(_)=> {
let symbols_len = reader.symbol_table().symbols().iter().len();
// Reduce the number of system symbols.
count += symbols_len - 10;
}
SystemStreamItem::Nothing => break,
_ => {}
}
}
println!("The number of symbols is {}", count);
Ok(())
}
Loading

0 comments on commit 929d015

Please sign in to comment.