-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcybernethunter_cli.py
349 lines (268 loc) · 13 KB
/
cybernethunter_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
#!/usr/bin/env python3
'''
NAME: cybernethunter.py | version: 0.1
CYBERNETHUNTER Version: 0.2
AUTHOR: Diego Perez (@darkquasar) - 2018
DESCRIPTION: Main module that controls the behaviour of the CYBERNETHUNTER Hunting and IR framework
Updates:
v0.1: ---.
ToDo:
1. Split the "output" argument into two different ones: output_type (xml, json, csv, etc.) and output_pipe (kafka, rabbitmq, stdout, etc.)
'''
import argparse
import importlib
import logging
import os
import sys
import time
from datetime import datetime as datetime
from pathlib import Path
from time import strftime
from streamz import Stream
from cybernethunter.helpermods import utils
from cybernethunter.helpermods import transforms
from cybernethunter.outputmods import output as cyout
from cybernethunter.parsermods import xml_parser as cyxml
from cybernethunter.parsermods import csv_parser as cycsv
class Arguments():
def __init__(self, args):
self.parser = argparse.ArgumentParser(
description="CYBERNETHUNTER DFIR Framework"
)
self.parser.add_argument(
"-a", "--action",
help="This option determines what action will be executed by CYBERNETHUNTER: parse logs, collect logs, hunt (runs a particular data anlysis mod) or learn (ML)",
type=str,
choices=["collect", "hunt", "learn", "parse"],
default="parse",
required=False
)
self.parser.add_argument(
"-f", "--file",
help="File or folder (the script will list all files within it) to be processed",
type=str,
required=True
)
self.parser.add_argument(
"-ht", "--hunt-template",
help="Select the hunting template (YAML format) that will be applied to your data",
type=str,
default=None,
required=False
)
self.parser.add_argument(
"-kb", "--kafka-broker",
help="Define the kafka broker options separated by a space as follows: IP PORT TOPIC. Example: ""127.0.0.1 9092 winlogbeat""",
type=str,
default="127.0.0.1 9092 logstash",
required=False
)
self.parser.add_argument(
"-l", "--log-type",
help="This option specifies the type of log being ingested. Type ""xml"" requires a file in XML format with proper wrapping (opening and closing top-level root node). Type csv requires a ""csv"" file in ASCII format.",
type=str,
choices=["xml", "csv"],
default="xml",
required=False
)
self.parser.add_argument(
"-m", "--module",
help="Use a module to perform ETL operations on target files",
type=str,
choices=["standard_parser", "xml_parser", "csv_parser", "dns_debug_logs_parser", "evtx_parser"],
default="standard_parser",
required=False
)
self.parser.add_argument(
"-of", "--output-file",
help="Name for the output file if this output pipe is selected",
type=str,
default=None,
required=False
)
self.parser.add_argument(
"-op", "--output-pipe",
help="Pipe of output: stdout, file, kafka, rabbitmq, elasticsearch",
type=str,
choices=["stdout", "file", "rabbitmq", "kafka", "elasticsearch"],
default="stdout",
required=False
)
self.parser.add_argument(
"-ot", "--output-type",
help="Type of output: csv, tsv, json, json_pretty, sqlite",
type=str,
choices=["tsv", "csv", "json", "json_pretty", "sqlite"],
default="json",
required=False
)
self.parser.add_argument(
"-rb", "--rabbitmq-broker",
help="Define the rabbit-mq broker options separated by a space as follows: ""IP PORT"". Example: ""127.0.0.1 9501""",
type=str,
default="127.0.0.1 9501",
required=False
)
self.parser.add_argument(
"-rc", "--rabbitmq-credentials",
help="Define the rabbit-mq broker credentials separated by a space as follows: ""user password"". Example: ""admin P@ssword123""",
type=str,
default="127.0.0.1 9501",
required=False
)
self.parser.add_argument(
"-x", "--xmlparsetype",
help="This option determines how the target XML file is parsed. When ""flat"" is selected, the XML will be converted to a flat json. When ""nested"" is selected, the XML will be converted to a nested json resembling the structure of the original XML. If two or more elements within the nested dictionary are equal, they will be embedded within a list.",
type=str,
choices=["nested", "flat"],
default="flat",
required=False
)
self.pargs = self.parser.parse_args()
def get_args(self):
return self.pargs
class cyh_helpers:
def __init__(self):
# Setup logging
self.utilities = utils.HelperMod()
self.transforms = transforms.HelperMod()
self.logger = self.utilities.get_logger('CYBERNETHUNTER')
# Define an "init_output_pipe" function that will initialize the output pipe for the records processed by the parsermods.
def init_output_pipe(self, output_pipe, output_type, output_file=None, log_type=None, kafka_broker=None, rabbitmq_broker=None, rabbitmq_credentials=None):
# Helper function to initialize an output pipe
self.kafka_broker = kafka_broker.split(" ")
self.rabbitmq_broker = rabbitmq_broker.split(" ")
self.rabbitmq_credentials = rabbitmq_credentials.split(" ")
self.output_pipe = cyout.Output(output_pipe=output_pipe, output_type=output_type, output_file=output_file, log_type=log_type, kafka_broker=self.kafka_broker, rabbitmq_broker=self.rabbitmq_broker, rabbitmq_credentials=self.rabbitmq_credentials)
def send_to_output_pipe(self, data, use_streamz=False):
# Helper function to iterate over a generator and send each record through the output pipe
self.logger.info('Running records through output pipe')
print('\n')
if use_streamz == False:
try:
while True:
record = data.__next__()
if record == None:
continue
if self.output_pipe.output_pipe == 'stdout':
if self.output_pipe.output_type == 'csv':
record = self.transforms.convert_json_record(record, to_type='csv')
elif self.output_pipe.output_type == 'tsv':
record = self.transforms.convert_json_record(record, to_type='tsv')
self.output_pipe.send(record)
except StopIteration:
pass
finally:
self.output_pipe.close_output_pipe()
else:
try:
# Setup Stream Pipeline
source_pipe = Stream()
if self.output_pipe.output_type == 'csv':
source_pipe.map(self.transforms.convert_json_record, to_type='csv').sink(self.output_pipe.send)
elif self.output_pipe.output_type == 'tsv':
source_pipe.map(self.transforms.convert_json_record, to_type='tsv').sink(self.output_pipe.send)
else:
source_pipe.sink(self.output_pipe.send)
while True:
record = data.__next__()
if record == None:
continue
source_pipe.emit(record)
except StopIteration:
pass
finally:
self.output_pipe.close_output_pipe()
def list_targetfiles(self, pargs):
# Checking to see if a directory or only one file was passed in as argument
# to "--file"
# Return the path object if it's a URL
if '//' in pargs.file:
url_string = self.transforms.normalize_url(pargs.file, return_string=True)
target_files = [url_string]
else:
file_path = Path(pargs.file)
# If a single file
if Path.is_dir(file_path) == False:
target_files = [file_path]
else:
# We need to capture any exceptions when collecting files within a folder
# to avoid having to clean the list of files inside a folder later on.
# CASE 1: logtype is "csv", we only want to keep a list of files that
# are csv files
if pargs.log_type == "csv":
file_type_filter = ".csv"
elif pargs.log_type == "xml":
file_type_filter = ".xml"
else:
file_type_filter = ""
try:
target_files = [f for f in file_path.iterdir() if Path.is_file(f) and file_type_filter in f.suffix]
except FileNotFoundError:
self.logger.Error('Please select a valid filename or directory')
return target_files
def main():
helpers = cyh_helpers()
# Capture arguments
args = Arguments(sys.argv)
pargs = args.get_args()
# Capturing start time for debugging purposes
st = datetime.now()
helpers.logger.info("Starting CYBERNETHUNTER Hunting Framework")
# CYBERNETHUNTER ACTION: PARSE
if pargs.action == "parse":
helpers.logger.info("Starting CYBERNETHUNTER Parsers")
# Obtain a list of all target files
targetfiles = helpers.list_targetfiles(pargs)
# Iterating over the results and closing pipe at the end
for file in targetfiles:
# Running a check to determine whether we have a file name for the output if a file pipe is selected
if pargs.output_pipe == 'file' and pargs.output_file == None:
helpers.logger.error("You must specify a --output-file parameter if you are choosing a file output pipe")
sys.exit()
# Start an output pipe
helpers.init_output_pipe(
output_pipe=pargs.output_pipe,
output_type=pargs.output_type,
output_file=pargs.output_file,
log_type=pargs.log_type,
kafka_broker=pargs.kafka_broker,
rabbitmq_broker=pargs.rabbitmq_broker,
rabbitmq_credentials=pargs.rabbitmq_credentials
)
# Load the required parsermod
load_parser_mod = importlib.import_module("." + pargs.module, "parsermods")
parsermod = load_parser_mod.ParserMod(file)
# Execute parsermod
record_generator = parsermod.execute()
# Send records to output pipe
helpers.send_to_output_pipe(record_generator, use_streamz=False)
# CYBERNETHUNTER ACTION: COLLECT
if pargs.action == "collect":
helpers.logger.info("Initiating CYBERNETHUNTER DFIR Collector")
helpers.logger.info("Starting CYBERNETHUNTER MultiParser")
# Obtain a list of all target files
targetfiles = helpers.list_targetfiles(pargs)
# Iterating over the results and closing pipe at the end
for file in targetfiles:
parsermod = importlib.import_module("." + pargs.module, "parsermods")
parsermod = parsermod.ParserMod(pargs.logtype, file, pargs.output, collect=True)
parsermod.execute()
parsermod.runpipe(parsermod.results)
# CYBERNETHUNTER ACTION: HUNT
elif pargs.action == "hunt":
# TBD: idea is to load the hunt-template and pass execution of the template
# to the "jaguarhunter" (imports PySpark) module inside huntmods. This module(s) will load the template
# and produce an ElasticSearch Index as output
print("TBD")
# Capturing end time for debugging purposes
et = datetime.now()
hours, remainder = divmod((et-st).seconds, 3600)
minutes, seconds = divmod(remainder, 60)
helpers.logger.info("Finished Parsing")
helpers.logger.info('Took: \x1b[47m \x1b[32m{} hours / {} minutes / {} seconds \x1b[0m \x1b[39m'.format(hours,minutes,seconds))
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print("\n" + "My awesome awesomeness has been interrupted by the gods. Returning to the depths of the earth" + "\n\n")