Skip to content

Commit

Permalink
Make skipping spaces a command line option #229
Browse files Browse the repository at this point in the history
  • Loading branch information
krlawrence committed Jul 1, 2022
1 parent 5095b85 commit 75842bd
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
20 changes: 18 additions & 2 deletions csv-gremlin/csv-gremlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ class NeptuneCSVReader:

def __init__(self, vbatch=1, ebatch=1, java_dates=False, max_rows=sys.maxsize,
assume_utc=False, stop_on_error=True, silent_mode=False,
escape_dollar=False, show_summary=True, double_suffix=False):
escape_dollar=False, show_summary=True, double_suffix=False,
skip_spaces = False):

self.vertex_batch_size = vbatch
self.edge_batch_size = ebatch
Expand All @@ -96,6 +97,7 @@ def __init__(self, vbatch=1, ebatch=1, java_dates=False, max_rows=sys.maxsize,
self.edge_count = 0
self.property_count = 0
self.verbose_summary = False
self.skip_spaces = skip_spaces

def get_batch_sizes(self):
return {'vbatch': self.vertex_batch_size,
Expand Down Expand Up @@ -153,6 +155,12 @@ def set_double_suffix(self,suffix:bool):
def get_double_suffix(self):
return self.double_suffix

def set_skip_spaces(self,skip:bool):
self.skip_spaces = skip

def get_skip_spaces(self):
return self.skip_spaces

def escape(self,string):
escaped = string.replace('"','\\"')
return escaped
Expand Down Expand Up @@ -451,7 +459,7 @@ def process_csv_file(self,fname):
self.property_count = 0
try:
with open(fname, newline='') as csvfile:
reader = csv.DictReader(csvfile, skipinitialspace=True, escapechar="\\")
reader = csv.DictReader(csvfile, skipinitialspace=self.skip_spaces, escapechar="\\")

if not '~id' in reader.fieldnames:
self.print_error('The header row must include an ~id column')
Expand Down Expand Up @@ -509,6 +517,13 @@ def process_csv_file(self,fname):
help='Suffix all floats and doubles with a "d" such as 12.34d. This is helpful\
when using the Gremlin Console or Groovy scripts as it will prevent\
floats and doubles automatically being created as BigDecimal objects.')
parser.add_argument('-skip_spaces', action='store_true',
help='Skip any leading spaces in each column.\
By defaut this setting is False and any leading spaces\
will be considered part of the column header or data value.\
This setting does not apply to values enclosed in quotes\
such as " abcd".',
default=False)
parser.add_argument('-escape_dollar', action='store_true',
help='For any dollar signs found convert them to an escaped\
form \$. This is needed if you are going to load the\
Expand All @@ -527,4 +542,5 @@ def process_csv_file(self,fname):
ncsv.set_escape_dollar(args.escape_dollar)
ncsv.set_double_suffix(args.double_suffix)
ncsv.set_show_summary(not args.no_summary)
ncsv.set_skip_spaces(args.skip_spaces)
ncsv.process_csv_file(args.csvfile)
2 changes: 1 addition & 1 deletion csv-gremlin/test-files/header-with-spaces.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
~id, ~label, type
~id, ~label, type
a1, animal, cat
a2, " animal ", cat
a3, animal , cat

0 comments on commit 75842bd

Please sign in to comment.