forked from amazon-ion/ion-cli
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add subcommands to support data stream analysis.
- Loading branch information
Showing
10 changed files
with
415 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
use std::alloc::dealloc; | ||
use std::fmt::Error; | ||
use std::fs::File; | ||
use crate::commands::{IonCliCommand, WithIonCliArgument}; | ||
use anyhow::{bail, Context, Result}; | ||
use clap::{ArgMatches, Command}; | ||
use std::io::{stdin, BufReader, StdinLock}; | ||
use ion_rs::element::reader::ElementReader; | ||
use ion_rs::element::Element; | ||
use ion_rs::{IonReader, IonType, Reader, ReaderBuilder}; | ||
|
||
|
||
pub struct DepthCommand; | ||
|
||
impl IonCliCommand for DepthCommand { | ||
fn name(&self) -> &'static str { | ||
"depth" | ||
} | ||
|
||
fn about(&self) -> &'static str { | ||
"Prints the maximum depth of the input ion stream." | ||
} | ||
|
||
fn configure_args(&self, command: Command) -> Command { | ||
command.with_input() | ||
} | ||
|
||
fn run(&self, _command_path: &mut Vec<String>, args: &ArgMatches) -> Result<()> { | ||
if let Some(input_file_iter) = args.get_many::<String>("input") { | ||
for input_file in input_file_iter { | ||
let file = File::open(input_file) | ||
.with_context(|| format!("Could not open file '{}'", input_file))?; | ||
let mut reader = ReaderBuilder::new().build(file)?; | ||
get_depth(&mut reader)?; | ||
} | ||
} else { | ||
bail!("this command does not yet support reading from STDIN") | ||
}; | ||
Ok(()) | ||
} | ||
} | ||
|
||
fn get_depth(reader: &mut Reader) -> Result<()> { | ||
let mut max_depth = 0; | ||
for element in reader.elements(){ | ||
let unwrap_element = element.unwrap(); | ||
max_depth = calculate_depth(&unwrap_element, 0); | ||
} | ||
println!("The maximum depth is {}",max_depth); | ||
Ok(()) | ||
} | ||
|
||
fn calculate_depth(element: &Element, depth: usize) -> usize { | ||
return if element.ion_type().is_container() { | ||
if element.ion_type() == IonType::Struct { | ||
element.as_struct().unwrap().iter().map(|(_field_name,e)| calculate_depth(e, depth + 1)).max().unwrap_or(depth) | ||
} else { | ||
element.as_sequence().unwrap().into_iter().map(|e| calculate_depth(e, depth + 1)).max().unwrap_or(depth) | ||
} | ||
} else { | ||
depth | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
pub mod count; | ||
pub mod size; | ||
pub mod depth; | ||
|
||
|
||
use crate::commands::IonCliCommand; | ||
use crate::commands::beta::analyze::count::CountCommand; | ||
use crate::commands::beta::analyze::size::SizeCommand; | ||
use crate::commands::beta::analyze::depth::DepthCommand; | ||
|
||
|
||
pub struct AnalyzeNamespace; | ||
|
||
impl IonCliCommand for AnalyzeNamespace { | ||
fn name(&self) -> &'static str { | ||
"analyze" | ||
} | ||
|
||
fn about(&self) -> &'static str { | ||
"The 'analyze' command is a namespace for commands used for Ion stream statistical analysis." | ||
} | ||
fn subcommands(&self) -> Vec<Box<dyn IonCliCommand>> { | ||
vec![Box::new(CountCommand), Box::new(SizeCommand), Box::new(DepthCommand)] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
use std::fs::File; | ||
use crate::commands::{IonCliCommand, WithIonCliArgument}; | ||
use anyhow::{bail, Context, Result}; | ||
use clap::{ArgMatches, Command}; | ||
use ion_rs::binary::non_blocking::raw_binary_reader::RawBinaryReader; | ||
use ion_rs::{IonReader, SystemReader, SystemStreamItem}; | ||
use memmap::MmapOptions; | ||
use lowcharts::plot; | ||
|
||
|
||
pub struct SizeCommand; | ||
|
||
impl IonCliCommand for SizeCommand { | ||
fn name(&self) -> &'static str { | ||
"size" | ||
} | ||
|
||
fn about(&self) -> &'static str { | ||
"Prints the overall min, max, mean size of top-level values in the input stream." | ||
} | ||
|
||
fn configure_args(&self, command: Command) -> Command { | ||
command.with_input() | ||
} | ||
|
||
fn run(&self, _command_path: &mut Vec<String>, args: &ArgMatches) -> Result<()> { | ||
if let Some(input_file_names) = args.get_many::<String>("input") { | ||
for input_file in input_file_names { | ||
let file = File::open(input_file.as_str()) | ||
.with_context(|| format!("Could not open file '{}'", &input_file))?; | ||
let mmap = unsafe { | ||
MmapOptions::new() | ||
.map(&file) | ||
.with_context(|| format!("Could not mmap '{}'", input_file))? | ||
}; | ||
// Treat the mmap as a byte array. | ||
let ion_data: &[u8] = &mmap[..]; | ||
let raw_reader = RawBinaryReader::new(ion_data); | ||
let mut system_reader = SystemReader::new(raw_reader); | ||
size_analyze(&mut system_reader); | ||
} | ||
} else { | ||
bail!("this command does not yet support reading from STDIN") | ||
} | ||
Ok(()) | ||
} | ||
} | ||
|
||
fn size_analyze(reader: &mut SystemReader<RawBinaryReader<&[u8]>>) -> Result<()> { | ||
let mut vec: Vec<f64> = Vec::new(); | ||
loop { | ||
match reader.next()? { | ||
SystemStreamItem::Value(_) => { | ||
let mut size = 0; | ||
if reader.annotations_length() != None { | ||
size = reader.annotations_length().unwrap() + reader.header_length() + reader.value_length(); | ||
} else { | ||
size = reader.header_length() + reader.value_length(); | ||
} | ||
vec.push(size as f64); | ||
}, | ||
SystemStreamItem::Nothing => break, | ||
_ => {} | ||
} | ||
} | ||
// Plot a histogram of the above vector, with 4 buckets and a precision | ||
// chosen by library | ||
let options = plot::HistogramOptions { intervals: 4, ..Default::default() }; | ||
let histogram = plot::Histogram::new(&vec, options); | ||
print!("{}", histogram); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
use crate::commands::{IonCliCommand, WithIonCliArgument}; | ||
use anyhow::{bail, Context, Result}; | ||
use clap::{ArgMatches, Command}; | ||
use ion_rs::*; | ||
use std::fs::File; | ||
use std::io::{stdin, BufReader, StdinLock}; | ||
use ion_rs::binary::non_blocking::raw_binary_reader::RawBinaryReader; | ||
use ion_rs::StreamItem::Nothing; | ||
use memmap::MmapOptions; | ||
|
||
pub struct SymbolTableCommand; | ||
|
||
impl IonCliCommand for SymbolTableCommand { | ||
fn name(&self) -> &'static str { | ||
"count" | ||
} | ||
|
||
fn about(&self) -> &'static str { | ||
"Prints the number of symbol tables." | ||
} | ||
|
||
fn configure_args(&self, command: Command) -> Command { | ||
command.with_input() | ||
} | ||
|
||
fn run(&self, _command_path: &mut Vec<String>, args: &ArgMatches) -> Result<()> { | ||
if let Some(input_file_names) = args.get_many::<String>("input") { | ||
// Input files were specified, run the converter on each of them in turn | ||
for input_file in input_file_names { | ||
let file = File::open(input_file.as_str()) | ||
.with_context(|| format!("Could not open file '{}'", &input_file))?; | ||
let mmap = unsafe { | ||
MmapOptions::new() | ||
.map(&file) | ||
.with_context(|| format!("Could not mmap '{}'", input_file))? | ||
}; | ||
// Treat the mmap as a byte array. | ||
let ion_data: &[u8] = &mmap[..]; | ||
let raw_reader = RawBinaryReader::new(ion_data); | ||
let mut system_reader = SystemReader::new(raw_reader); | ||
symbol_tables(&mut system_reader); | ||
} | ||
} else { | ||
bail!("this command does not yet support reading from STDIN") | ||
} | ||
Ok(()) | ||
} | ||
} | ||
|
||
fn symbol_tables(reader: &mut SystemReader<RawBinaryReader<&[u8]>>) -> Result<()> { | ||
let mut count = 0; | ||
loop { | ||
match reader.next()? { | ||
SystemStreamItem::SymbolTableValue(IonType::Struct) => { | ||
count += 1; | ||
}, | ||
SystemStreamItem::Nothing => break, | ||
_ => {} | ||
} | ||
} | ||
println!("The number of symbol tables is {} ", count); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
use crate::commands::{IonCliCommand, WithIonCliArgument}; | ||
use anyhow::{bail, Context, Result}; | ||
use clap::{ArgMatches, Command}; | ||
use ion_rs::*; | ||
use std::fs::{File, read_to_string}; | ||
use std::io::{stdin, BufReader, StdinLock}; | ||
use std::ptr::null; | ||
use ion_rs::binary::non_blocking::raw_binary_reader::RawBinaryReader; | ||
use ion_rs::element::Element; | ||
use ion_rs::element::reader::ElementReader; | ||
use ion_rs::StreamItem::Nothing; | ||
use ion_rs::types::Struct; | ||
use ion_schema::isl::isl_constraint::IslConstraintValue::Annotations; | ||
use memmap::MmapOptions; | ||
|
||
|
||
pub struct SymbolNumberCommand; | ||
|
||
impl IonCliCommand for SymbolNumberCommand { | ||
fn name(&self) -> &'static str { | ||
"symbol_count" | ||
} | ||
|
||
fn about(&self) -> &'static str { | ||
"Prints the number of symbols." | ||
} | ||
|
||
fn configure_args(&self, command: Command) -> Command { | ||
command.with_input() | ||
} | ||
|
||
fn run(&self, _command_path: &mut Vec<String>, args: &ArgMatches) -> Result<()> { | ||
if let Some(input_file_names) = args.get_many::<String>("input") { | ||
// Input files were specified, run the converter on each of them in turn | ||
for input_file in input_file_names { | ||
let file = File::open(input_file.as_str()) | ||
.with_context(|| format!("Could not open file '{}'", &input_file))?; | ||
let mmap = unsafe { | ||
MmapOptions::new() | ||
.map(&file) | ||
.with_context(|| format!("Could not mmap '{}'", input_file))? | ||
}; | ||
// Treat the mmap as a byte array. | ||
let ion_data: &[u8] = &mmap[..]; | ||
let raw_reader = RawBinaryReader::new(ion_data); | ||
let mut system_reader = SystemReader::new(raw_reader); | ||
symtab_number(&mut system_reader); | ||
} | ||
} else { | ||
bail!("this command does not yet support reading from STDIN") | ||
} | ||
Ok(()) | ||
} | ||
} | ||
|
||
fn symtab_number(reader: &mut SystemReader<RawBinaryReader<&[u8]>>) -> Result<()> { | ||
let mut count = 0; | ||
loop { | ||
match reader.next()? { | ||
SystemStreamItem:: Value(_)=> { | ||
let symbols_len = reader.symbol_table().symbols().iter().len(); | ||
// Reduce the number of system symbols. | ||
count += symbols_len - 10; | ||
} | ||
SystemStreamItem::Nothing => break, | ||
_ => {} | ||
} | ||
} | ||
println!("The number of symbols is {}", count); | ||
Ok(()) | ||
} |
Oops, something went wrong.