diff --git a/jello/cli.py b/jello/cli.py index f496121..4aec4d2 100644 --- a/jello/cli.py +++ b/jello/cli.py @@ -1,5 +1,7 @@ """jello - query JSON at the command line with python syntax""" +import contextlib +import json import os import sys import signal @@ -8,6 +10,7 @@ import traceback from textwrap import TextWrapper import jello +import jello.lib as lib from jello.lib import opts, load_json, read_file, pyquery, format_response @@ -21,7 +24,7 @@ def get_stdin(): if sys.stdin.isatty(): return None else: - return sys.stdin.read() + return sys.stdin def print_help(): @@ -45,6 +48,8 @@ def print_help(): -r raw string output (no quotes) -R raw string input (don't auto convert input to dict/list) -s print the JSON schema in grep-able format + -S stream input newline-delimited json from STDIN or file + from -f. "_" is an iterator providing the entries -t print type annotations in schema view -v version info -h help @@ -126,6 +131,27 @@ def print_exception(e=None, data='', query='', response='', ex_type='Runtime'): sys.exit(1) +def print_json_streaming_error(e, query='', ex_type='Json Load'): + cause = e.__cause__ + data = None + if isinstance(cause, json.JSONDecodeError): + data = cause.doc + print_exception( + cause, + data=data, + query=query, + ex_type=ex_type + ) + + +class nullclosing: + """simpler version of contextlib.nullcontext, which was introduced in 3.7 + (currently support 3.6)""" + + def close(self): + pass + + def main(data=None, query='_'): # break on ctrl-c keyboard interrupt signal.signal(signal.SIGINT, ctrlc) @@ -140,12 +166,14 @@ def main(data=None, query='_'): if sys.platform.startswith('win32'): os.system('') + stdin = None if data is None: - data = get_stdin() + stdin = get_stdin() options = [] long_options = {} arg_section = '' # can be query_file or data_files + data_files = [] for arg in sys.argv[1:]: if arg == '-q': @@ -164,10 +192,8 @@ def main(data=None, query='_'): arg_section = '' elif arg_section == 'data_files': - try: - data += '\n' + read_file(arg) - except Exception as e: - print_error(f'jello: Issue reading data file: {e}') + data_files.append(arg) + arg_section = '' elif arg.startswith('-') and not arg.startswith('--'): options.extend(arg[1:]) @@ -196,6 +222,7 @@ def main(data=None, query='_'): opts.raw = opts.raw or 'r' in options opts.raw_input = opts.raw_input or 'R' in options opts.schema = opts.schema or 's' in options + opts.stream_input = opts.stream_input or 'S' in options opts.types = opts.types or 't' in options opts.version_info = opts.version_info or 'v' in options opts.helpme = opts.helpme or 'h' in options @@ -213,39 +240,52 @@ def main(data=None, query='_'): ''')) sys.exit() - if data is None and not opts.empty: + if not opts.empty and data is None and not stdin and not data_files: print_error('jello: Missing JSON or JSON Lines data via STDIN or file via -f option.\n') + # read all the file sources + input_context_manager = contextlib.closing(nullclosing()) if opts.empty: data = '{}' - - # load the data as a raw string or JSON - if opts.raw_input: - data = str(data).rstrip('\r\n') - + elif opts.stream_input: + data = lib.StreamingJsonInput(data, stdin, data_files) + input_context_manager = data else: - # load the JSON or JSON Lines into a dict or list of dicts + data = lib.read_data_nonstreaming(data, stdin, data_files) + if opts.raw_input: + data = str(data).rstrip('\r\n') + else: + try: + data = load_json(data) + except json.JSONDecodeError as e: + print_exception(e, ex_type='JSON Load') + + # closes input data resources + with input_context_manager as _: + # Read .jelloconf.py (if it exists) and run the query + response = '' try: - data = load_json(data) + response = pyquery(data, query) + except lib.StreamingJsonError as e: + # when streaming input errors are not raised until the data is + # pulled by the user query + print_json_streaming_error(e) except Exception as e: - print_exception(e, ex_type='JSON Load') - - # Read .jelloconf.py (if it exists) and run the query - response = '' - try: - response = pyquery(data, query) - except Exception as e: - print_exception(e, data, query, ex_type='Query') + print_exception(e, data, query, ex_type='Query') - # reset opts.mono after pyquery since initialization in pyquery can change values - if opts.force_color: - opts.mono = False + # reset opts.mono after pyquery since initialization in pyquery can change values + if opts.force_color: + opts.mono = False - # Create and print schema or JSON/JSON-Lines/Lines - try: - format_response(response) - except Exception as e: - print_exception(e, data, query, response, ex_type='Output') + # Create and print schema or JSON/JSON-Lines/Lines + try: + format_response(response) + except lib.StreamingJsonError as e: + # when streaming output using -F we don't parse input and process it + # until we pull from the output iterator here + print_json_streaming_error(e) + except Exception as e: + print_exception(e, data, query, response, ex_type='Output') if __name__ == '__main__': diff --git a/jello/lib.py b/jello/lib.py index 635cbac..49eb47a 100644 --- a/jello/lib.py +++ b/jello/lib.py @@ -1,6 +1,7 @@ """jello - query JSON at the command line with python syntax""" import collections.abc +from io import StringIO import os import sys import types @@ -59,6 +60,7 @@ class opts: number_color = None string_color = None flatten = None + stream_input = None class JelloTheme: @@ -440,6 +442,172 @@ def read_file(file_path): return f.read() +def read_data_nonstreaming(initial_data, stdin, data_files): + sio = StringIO() + sep = "" + if initial_data is not None: + sio.write(initial_data) + sio.write("\n") + sep = "\n" + if stdin: + sio.write(sep) + sio.write(stdin.read()) + sep = "\n" + for file in data_files: + sio.write(sep) + # let the JsonDecoderError raise + sio.write(read_file(file)) + sep = "\n" + return sio.getvalue() + + +class StreamingJsonError(Exception): + ''' + Wraps exceptions raised while loading and parsing json data. + Raised from exception so that __cause__ provides the underlying error. + + When streaming data is not deserialized until pulled from an iterator during + user query execution or output formatting (when using -F to flatten and + stream the output). One cannot rely on where an exception is caught to + indicate what went wrong. This class signifies that an exception occurred + during reading or deserializing input data even when the exception + propagates from later function calls. + ''' + + +class CloseableIterator(collections.abc.Iterator): + ''' + Iterator that also provides close() method. + Provides for safe file closing when reading from files within + iterators/generators, where the scope cannot be controlled to use a context + manager. + ''' + closer = None + it = None + + def __init__(self, closer, it): + self.closer = closer + self.it = it + + def close(self): + self.closer() + + def __iter__(self): + return self + + def __next__(self): + return next(self.it) + + +def _generate_json_from_lines_iter(lines_iter): + """ + Returns iterator of json objects from newline-delimited json input iterable. + lines_iter is any iterable whose iterator returns strings of individual json + objects. For example, a list of strings or a file-like object of ndjson. + """ + + # the set of exceptions file readline() may throw is not documented. + # separating this apart to isolate exceptions arising from reading an + # underlying iterator and file. + it = iter(lines_iter) + while True: + try: + line = next(it) + except StopIteration: + return + except Exception as e: + raise StreamingJsonError from e + + stripped = line.strip() + if not stripped: + continue + + try: + yield json.loads(stripped) + except json.JSONDecodeError as e: + raise StreamingJsonError from e + + +class StreamingJsonInput: + """ + Iterator and context manager for streaming json from stdin and files. + """ + + # these are "closeable iterators" when backed by a closeable file + current_iterator = None + # functions to construct remaining cloaseable iterators + remaining_iterator_factories = None + + def __init__(self, initial_data, stdin, files): + """ + initial_data. String from cli.main. + stdin_or_files. sys.stdin or data file paths. + """ + # set up the iterators + self.remaining_iterator_factories = collections.deque() + if initial_data: + self.remaining_iterator_factories.append( + lambda: CloseableIterator( + lambda: None, + _generate_json_from_lines_iter(initial_data.splitlines()) + ) + ) + + if stdin: + self.remaining_iterator_factories.append( + lambda f=stdin: CloseableIterator( + # don't close stdin + lambda: None, + _generate_json_from_lines_iter(f) + ), + ) + + for file in files: + def create_file_iterator(f=file): + # the file must live beyond this function call. + # StreamingJsonInput closes it as a context manager via + # CloseableIterator + # pylint: disable-next=R1732:consider-using-with + try: + opened_file = open(f, 'r') + except OSError as e: + raise StreamingJsonError from e + return CloseableIterator( + lambda f2=opened_file: f2.close(), + _generate_json_from_lines_iter(opened_file) + ) + self.remaining_iterator_factories.append(create_file_iterator) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.current_iterator: + self.current_iterator.close() + + def __iter__(self): + return self + + def __next__(self): + """ + Returns the next json object. + Raises StreamingJsonException on any deserialization error, with + __cause__ as the original Exception. + """ + while True: + if self.current_iterator is None: + if not self.remaining_iterator_factories: + raise StopIteration + factory = self.remaining_iterator_factories.popleft() + self.current_iterator = factory() + + try: + return next(self.current_iterator) + except StopIteration: + self.current_iterator.close() + self.current_iterator = None + + def _compile_query(query): """ Compile the provided python code block into a function to transform json. diff --git a/tests/test_main.py b/tests/test_main.py index b59ba73..51e9009 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -35,6 +35,7 @@ def setUp(self): opts.number_color = None opts.string_color = None opts.flatten = None + opts.stream_input = None self.jc_a_output = '''{"name": "jc", "version": "1.9.3", "description": "jc cli output JSON conversion tool", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "parser_count": 50, "parsers": [{"name": "airport", "argument": "--airport", "version": "1.0", "description": "airport -I command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["darwin"], "magic_commands": ["airport -I"]}, {"name": "airport_s", "argument": "--airport-s", "version": "1.0", "description": "airport -s command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["darwin"], "magic_commands": ["airport -s"]}, {"name": "arp", "argument": "--arp", "version": "1.2", "description": "arp command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "aix", "freebsd", "darwin"], "magic_commands": ["arp"]}, {"name": "blkid", "argument": "--blkid", "version": "1.0", "description": "blkid command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["blkid"]}, {"name": "crontab", "argument": "--crontab", "version": "1.1", "description": "crontab command and file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "aix", "freebsd"], "magic_commands": ["crontab"]}, {"name": "crontab_u", "argument": "--crontab-u", "version": "1.0", "description": "crontab file parser with user support", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "aix", "freebsd"]}, {"name": "csv", "argument": "--csv", "version": "1.0", "description": "CSV file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "details": "Using the python standard csv library", "compatible": ["linux", "darwin", "cygwin", "win32", "aix", "freebsd"]}, {"name": "df", "argument": "--df", "version": "1.1", "description": "df command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin"], "magic_commands": ["df"]}, {"name": "dig", "argument": "--dig", "version": "1.1", "description": "dig command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "aix", "freebsd", "darwin"], "magic_commands": ["dig"]}, {"name": "du", "argument": "--du", "version": "1.1", "description": "du command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "aix", "freebsd"], "magic_commands": ["du"]}, {"name": "env", "argument": "--env", "version": "1.1", "description": "env command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "win32", "aix", "freebsd"], "magic_commands": ["env"]}, {"name": "file", "argument": "--file", "version": "1.1", "description": "file command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "aix", "freebsd", "darwin"], "magic_commands": ["file"]}, {"name": "free", "argument": "--free", "version": "1.0", "description": "free command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["free"]}, {"name": "fstab", "argument": "--fstab", "version": "1.0", "description": "fstab file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"]}, {"name": "group", "argument": "--group", "version": "1.0", "description": "/etc/group file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "aix", "freebsd"]}, {"name": "gshadow", "argument": "--gshadow", "version": "1.0", "description": "/etc/gshadow file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "aix", "freebsd"]}, {"name": "history", "argument": "--history", "version": "1.2", "description": "history command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "details": "Optimizations by https://github.com/philippeitis", "compatible": ["linux", "darwin", "cygwin", "aix", "freebsd"]}, {"name": "hosts", "argument": "--hosts", "version": "1.0", "description": "/etc/hosts file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "win32", "aix", "freebsd"]}, {"name": "id", "argument": "--id", "version": "1.0", "description": "id command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "aix", "freebsd"], "magic_commands": ["id"]}, {"name": "ifconfig", "argument": "--ifconfig", "version": "1.5", "description": "ifconfig command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "details": "Using ifconfig-parser package from https://github.com/KnightWhoSayNi/ifconfig-parser", "compatible": ["linux", "aix", "freebsd", "darwin"], "magic_commands": ["ifconfig"]}, {"name": "ini", "argument": "--ini", "version": "1.0", "description": "INI file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "details": "Using configparser from the standard library", "compatible": ["linux", "darwin", "cygwin", "win32", "aix", "freebsd"]}, {"name": "iptables", "argument": "--iptables", "version": "1.1", "description": "iptables command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["iptables"]}, {"name": "jobs", "argument": "--jobs", "version": "1.0", "description": "jobs command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "aix", "freebsd"], "magic_commands": ["jobs"]}, {"name": "last", "argument": "--last", "version": "1.0", "description": "last and lastb command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "aix", "freebsd"], "magic_commands": ["last", "lastb"]}, {"name": "ls", "argument": "--ls", "version": "1.3", "description": "ls command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "aix", "freebsd"], "magic_commands": ["ls"]}, {"name": "lsblk", "argument": "--lsblk", "version": "1.3", "description": "lsblk command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["lsblk"]}, {"name": "lsmod", "argument": "--lsmod", "version": "1.1", "description": "lsmod command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["lsmod"]}, {"name": "lsof", "argument": "--lsof", "version": "1.0", "description": "lsof command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["lsof"]}, {"name": "mount", "argument": "--mount", "version": "1.1", "description": "mount command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin"], "magic_commands": ["mount"]}, {"name": "netstat", "argument": "--netstat", "version": "1.2", "description": "netstat command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["netstat"]}, {"name": "ntpq", "argument": "--ntpq", "version": "1.0", "description": "ntpq -p command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["ntpq"]}, {"name": "passwd", "argument": "--passwd", "version": "1.0", "description": "/etc/passwd file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "aix", "freebsd"]}, {"name": "pip_list", "argument": "--pip-list", "version": "1.0", "description": "pip list command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "win32", "aix", "freebsd"], "magic_commands": ["pip list", "pip3 list"]}, {"name": "pip_show", "argument": "--pip-show", "version": "1.0", "description": "pip show command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "win32", "aix", "freebsd"], "magic_commands": ["pip show", "pip3 show"]}, {"name": "ps", "argument": "--ps", "version": "1.1", "description": "ps command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "aix", "freebsd"], "magic_commands": ["ps"]}, {"name": "route", "argument": "--route", "version": "1.0", "description": "route command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["route"]}, {"name": "shadow", "argument": "--shadow", "version": "1.0", "description": "/etc/shadow file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "aix", "freebsd"]}, {"name": "ss", "argument": "--ss", "version": "1.0", "description": "ss command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["ss"]}, {"name": "stat", "argument": "--stat", "version": "1.0", "description": "stat command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["stat"]}, {"name": "systemctl", "argument": "--systemctl", "version": "1.0", "description": "systemctl command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["systemctl"]}, {"name": "systemctl_lj", "argument": "--systemctl-lj", "version": "1.0", "description": "systemctl list-jobs command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["systemctl list-jobs"]}, {"name": "systemctl_ls", "argument": "--systemctl-ls", "version": "1.0", "description": "systemctl list-sockets command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["systemctl list-sockets"]}, {"name": "systemctl_luf", "argument": "--systemctl-luf", "version": "1.0", "description": "systemctl list-unit-files command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["systemctl list-unit-files"]}, {"name": "timedatectl", "argument": "--timedatectl", "version": "1.0", "description": "timedatectl status command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux"], "magic_commands": ["timedatectl", "timedatectl status"]}, {"name": "uname", "argument": "--uname", "version": "1.1", "description": "uname -a command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin"], "magic_commands": ["uname"]}, {"name": "uptime", "argument": "--uptime", "version": "1.0", "description": "uptime command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "aix", "freebsd"], "magic_commands": ["uptime"]}, {"name": "w", "argument": "--w", "version": "1.0", "description": "w command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "aix", "freebsd"], "magic_commands": ["w"]}, {"name": "who", "argument": "--who", "version": "1.0", "description": "who command parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "compatible": ["linux", "darwin", "cygwin", "aix", "freebsd"], "magic_commands": ["who"]}, {"name": "xml", "argument": "--xml", "version": "1.0", "description": "XML file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "details": "Using the xmltodict library at https://github.com/martinblech/xmltodict", "compatible": ["linux", "darwin", "cygwin", "win32", "aix", "freebsd"]}, {"name": "yaml", "argument": "--yaml", "version": "1.0", "description": "YAML file parser", "author": "Kelly Brazil", "author_email": "kellyjonbrazil@gmail.com", "details": "Using the ruamel.yaml library at https://pypi.org/project/ruamel.yaml", "compatible": ["linux", "darwin", "cygwin", "win32", "aix", "freebsd"]}]}''' @@ -3785,7 +3786,7 @@ def test_data_file(self): # patch read_file function to mock initialization file old_read_file = copy.copy(jello.cli.read_file) - jello.cli.read_file = lambda x: '''{"a": "hello world"}''' + jello.lib.read_file = lambda x: '''{"a": "hello world"}''' sample = '' expected = '"hello world"\n' @@ -3796,7 +3797,7 @@ def test_data_file(self): with patch.object(sys, 'argv', testargs): _ = jello.cli.main(data=sample) - jello.cli.read_file = old_read_file + jello.lib.read_file = old_read_file self.assertEqual(f.getvalue(), expected) @@ -3927,5 +3928,34 @@ def test_flatten_error_not_iterator(self): self.assertEqual(context.args[0], err_expected) + def test_stream_input(self): + sample = '''\ +{"k":1,"v":10} +{"k":2,"v":20} +''' + query = '''\ +_ +''' + expected = '''\ +[ + { + "k": 1, + "v": 10 + }, + { + "k": 2, + "v": 20 + } +] +''' + f = io.StringIO() + with contextlib.redirect_stdout(f): + testargs = ['jello', '-S', query] + with patch.object(sys, 'argv', testargs): + _ = jello.cli.main(data=sample) + + self.assertEqual(f.getvalue(), expected) + + if __name__ == '__main__': unittest.main()