From b212901c1a15a7c516749eee61de6c58655e7f54 Mon Sep 17 00:00:00 2001
From: AlessioSavi <alessio.savi-cic-it@ibm-com>
Date: Tue, 21 May 2019 23:31:56 +0200
Subject: [PATCH] v0.1.1

Enhancements

Issue #2
 - Model saving mechanism rewritten from scratch (using timestamp as name)
  - Every model will be now saved in a different directory
  - Every data related to the model (dataset + configuration) will be saved in the same folder
  - Configuration file changed due to new implementation of model folder
  - dump_model (dataset) rewritten and migrated to utils
  - dump_model (classifier) rewriten in order to be compliant with new folder architecture
 - Remove migrated parallelism from "different person" from "different image same person"
 - Enabled progress bar during face analysis
 - Response constuctor will now accept parameter

Issue #4
 - Create function for retrieve the dataset from the input HTML form and return to tune/train function
 - Standardize and refactor logic for train/tune

BugFix
 - Dump the real classifier (grid.best_estimator_)
---
 api/Api.py                  | 101 ++++++++--------------
 conf/test.json              |  13 ++-
 datastructure/Classifier.py | 162 ++++++++++++++++++++++--------------
 datastructure/Person.py     |   2 +-
 datastructure/Response.py   |  10 +--
 main.py                     |   2 +-
 utils/util.py               |  74 +++++++++++++---
 7 files changed, 210 insertions(+), 154 deletions(-)

diff --git a/api/Api.py b/api/Api.py
index ad782af..4c08b30 100644
--- a/api/Api.py
+++ b/api/Api.py
@@ -2,15 +2,13 @@
 """
 Custom function that will be wrapped for be HTTP compliant
 """
-import os
-import pickle
+
 import time
-import zipfile
+from datetime import datetime
 from logging import getLogger
-from os.path import join as path_join
 
 from datastructure.Response import Response
-from utils.util import print_prediction_on_image, random_string, remove_dir, unzip_data
+from utils.util import print_prediction_on_image, random_string, retrieve_dataset
 
 log = getLogger()
 
@@ -24,12 +22,12 @@ def predict_image(img_path, clf, PREDICTION_PATH):
 	:return: Response dictionary jsonizable
 	"""
 	response = Response()
-	log.debug("predict_image | Predicting {}".format(img_path))
 	if clf is None:
+		log.error("predict_image | FATAL | Classifier is None!")
 		prediction = None
 	else:
-		prediction = clf.predict(img_path)
-	log.debug("predict_image | Image analyzed!")
+		log.debug("predict_image | Predicting {}".format(img_path))
+		prediction = clf.predict(img_path, distance_threshold=0.45)
 	# Manage success
 	if prediction is not None and isinstance(prediction, list) and len(prediction) == 1:
 		img_name = random_string() + ".png"
@@ -79,23 +77,23 @@ def train_network(folder_uncompress, zip_file, clf):
 	:param clf:
 	:return:
 	"""
-	log.debug("train_network | uncompressing zip file ...")
-	folder_name = path_join(folder_uncompress, random_string())
-	zip_ref = zipfile.ZipFile(zip_file)
-	zip_ref.extractall(folder_name)
-	zip_ref.close()
-	log.debug("train_network | zip file uncompressed!")
-	clf.init_peoples_list(peoples_path=folder_name)
-	dataset = clf.init_dataset()
-	neural_model_file = clf.train(dataset["X"], dataset["Y"])
-	log.debug("train_network | Removing unzipped files")
-	remove_dir(folder_name)
-	response = Response()
-	response.status = "OK"
-	response.data = neural_model_file
-	response.description = "Model succesfully trained!"
 
-	return response.__dict__
+	log.debug("train_network | Starting training phase ...")
+	dataset = retrieve_dataset(folder_uncompress, zip_file, clf)
+
+	if dataset is None:
+		return Response(error="ERROR DURING LOADING DAT", description="Seems that the dataset is not valid").__dict__
+
+	else:
+		timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+		neural_model_file, elapsed_time = clf.train(dataset["X"], dataset["Y"], timestamp)
+
+		response = Response(status="OK", data=neural_model_file)
+		response.description = "Model succesfully trained! | {}".format(
+			time.strftime("%H:%M:%S.%f", time.gmtime(elapsed_time)))
+		log.debug("train_network | Tuning phase finihsed! | {}".format(response.description))
+
+		return response.__dict__
 
 
 def tune_network(folder_uncompress, zip_file, clf):
@@ -106,50 +104,19 @@ def tune_network(folder_uncompress, zip_file, clf):
 	:param clf:
 	:return:
 	"""
-	log.debug("tune_network | uncompressing zip file ...")
-	check = verify_extension(zip_file.filename)
-	if check == "zip":  # Image provided
-		folder_name = unzip_data(folder_uncompress, zip_file)
-		log.debug("tune_network | zip file uncompressed!")
-		clf.init_peoples_list(peoples_path=folder_name)
-		dataset = clf.init_dataset()
-	elif check == "dat":
-		dataset = pickle.load(zip_file)
+	log.debug("tune_network | Starting tuning phase ...")
+	dataset = retrieve_dataset(folder_uncompress, zip_file, clf)
+
+	if dataset is None:
+		return Response(error="ERROR DURING LOADING DAT", description="Seems that the dataset is not valid").__dict__
+
 	else:
-		dataset = None
-
-	if dataset is not None:
-		start_time = time.time()
-		neural_model_file = clf.tuning(dataset["X"], dataset["Y"])
-		elapsed_time = time.time() - start_time
-
-		log.debug("tune_network | Removing unzipped files")
-		if check == "zip":
-			# TODO: Refactor this method :/
-			remove_dir(folder_name)
-		response = Response()
-		response.status = "OK"
-		response.data = neural_model_file
+		timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+		neural_model_file, elapsed_time = clf.tuning(dataset["X"], dataset["Y"], timestamp)
+
+		response = Response(status="OK", data=neural_model_file)
 		response.description = "Model succesfully trained! | {}".format(
 			time.strftime("%H:%M:%S.%f", time.gmtime(elapsed_time)))
-	else:
-		response = Response()
-		response.error = "ERROR DURING LOADING DAT"
-	return response.__dict__
-
+		log.debug("train_network | Tuning phase finihsed! | {}".format(response.description))
 
-def verify_extension(file):
-	"""
-	Wrapper for validate file
-	:param file:
-	:return:
-	"""
-	extension = os.path.splitext(file)[1]
-	log.debug("verify_extension | File: {} | Ext: {}".format(file, extension))
-	if extension == ".zip":
-		# In this case we have to analyze the photos
-		return "zip"
-	elif extension == ".dat":
-		# Photos have been alredy analyzed, dataset is ready!
-		return "dat"
-	return None
+		return response.__dict__
diff --git a/conf/test.json b/conf/test.json
index 5fcf24f..de4e3f6 100644
--- a/conf/test.json
+++ b/conf/test.json
@@ -1,6 +1,6 @@
 {
 	"PyRecognizer": {
-		"Version": "0.0.1",
+		"Version": "0.1.1	",
 		"temp_upload_training": "uploads/training/",
 		"temp_upload_predict": "uploads/predict/",
 		"temp_upload": "uploads/upload"
@@ -23,9 +23,14 @@
 	"classifier": {
 		"trainin_dir": "dataset/images/",
 		"model_path": "dataset/model/",
-		"model": "model-20190518_191827.clf",
-		"n_neighbors": "",
-		"knn_algo": ""
+		"timestamp": "20190521_131449",
+		"params": {
+			"algorithm": "ball_tree",
+			"metric": "minkowski",
+			"n_neighbors": 78,
+			"p": 2,
+			"weights": "distance"
+		}
 	},
 	"data": {
 		"test_data": "/tmp/test_data/"
diff --git a/datastructure/Classifier.py b/datastructure/Classifier.py
index 5841615..fa6077e 100644
--- a/datastructure/Classifier.py
+++ b/datastructure/Classifier.py
@@ -6,9 +6,11 @@
 import logging
 import os
 import pickle
-from datetime import datetime
+import time
 from math import sqrt
-from multiprocessing.pool import ThreadPool
+from tqdm import tqdm
+
+
 from pprint import pformat
 
 import face_recognition
@@ -32,19 +34,22 @@ def __init__(self):
 		self.training_dir = None
 		self.model_path = None
 		self.n_neighbors = None
-		self.knn_algo = None
+		self.algorithm = None
+		self.metric = None
+		self.p = None
+		self.weights = None
 		self.peoples_list = []
 		self.classifier = None
 
-	def init_knn_algo(self, knn_algo):
+	def init_algorithm(self, algorithm):
 		"""
-		Initialize the knn_algorithm for the neural network. If not provided the 'ball_tree' will
+		Initialize the algorithmrithm for the neural network. If not provided the 'ball_tree' will
 		be used as default
-		:param knn_algo: 'ball_tree' as default
+		:param algorithm: 'ball_tree' as default
 		"""
-		log.debug("init_knn_algo | Initializing knn algorithm ...")
-		if self.knn_algo is None:
-			self.knn_algo = knn_algo
+		log.debug("init_algorithm | Initializing knn algorithm ...")
+		if self.algorithm is None:
+			self.algorithm = algorithm
 
 	def init_n_neighbors(self, X_len=10):
 		"""
@@ -63,45 +68,51 @@ def init_classifier(self):
 		"""
 		if self.classifier is None:
 			log.debug("init_classifier | START!")
-			if self.knn_algo is not None and self.n_neighbors is not None:
+			if self.algorithm is not None and self.n_neighbors is not None:
 				log.debug("init_classifier | Initializing a new classifier ... | {0}".format(pformat(self.__dict__)))
 				self.classifier = KNeighborsClassifier(
-					n_neighbors=self.n_neighbors, algorithm=self.knn_algo, weights='distance')
+					n_neighbors=self.n_neighbors, algorithm=self.algorithm, weights='distance')
 			else:
-				log.error("init_classifier | Mandatory parameter not provided :/")
-				self.classifier = None
-
-	def init_specs(self, X_len, knn_algo='ball_tree'):
-		"""
-		Initalize the classifier
-		:param knn_algo:
-		:param X_len:
-		"""
-		log.debug("init_specs | Init knn algorithm ...")
-		self.init_knn_algo(knn_algo)
-		self.init_n_neighbors(X_len)
-		self.init_classifier()
+				log.error("init_classifier | Mandatory parameter not provided | Init a new KNN Classifier")
+				self.classifier = KNeighborsClassifier()
 
-	def load_classifier_from_file(self, classifier_file):
+	def load_classifier_from_file(self, timestamp):
 		"""
-		Initalize the classifier from file
-		:param classifier_file:
+		Initalize the classifier from file.
+		The classifier file rappresent the name of the directory related to the classifier that we want to load.
+
+		The tree structure of the the model folder will be something like this
+
+		 Structure:
+		model/
+		├── <20190520_095119>/  --> Timestamp in which the model was created
+		│   ├── model.dat       -->  Dataset generated by encoding the faces and pickelizing them
+		│   ├── model.clf       -->  Classifier delegated to recognize a given face
+		│   ├── model.json      -->  Hyperparameters related to the current classifier
+		├── <20190519_210950>/
+		│   ├── model.dat
+		│   ├── model.clf
+		│   ├── model.json
+		└── ...
+
+		:param timestamp:
 		:return:
 		"""
-		log.debug("load_classifier_from_file | Loading classifier from file ... | File: {}".format(classifier_file))
+		log.debug("load_classifier_from_file | Loading classifier from file ... | File: {}".format(timestamp))
 
 		# Load a trained KNN model (if one was passed in)
 		err = None
 		if self.classifier is None:
 			if self.model_path is None or not os.path.isdir(self.model_path):
 				raise Exception("Model folder not provided!")
-			log.debug("load_classifier_from_file | Loading classifier from file ...")
-			log.debug("load_classifier_from_file | Path {} exist ...".format(self.model_path))
-			filename = os.path.join(self.model_path, classifier_file)
+			# Adding the conventional name used for the classifier -> 'model.clf'
+			filename = os.path.join(self.model_path, timestamp, "model.clf")
+			log.debug("load_classifier_from_file | Loading classifier from file: {}".format(filename))
 			if os.path.isfile(filename):
 				log.debug("load_classifier_from_file | File {} exist ...".format(filename))
 				with open(filename, 'rb') as f:
 					self.classifier = pickle.load(f)
+				log.debug("load_classifier_from_file | Classifier loaded!")
 			else:
 				err = "load_classifier_from_file | FATAL | File {} DOES NOT EXIST ...".format(filename)
 		else:
@@ -113,32 +124,44 @@ def load_classifier_from_file(self, classifier_file):
 			self.classifier = None
 		return
 
-	def train(self, X, Y):
+	def train(self, X, Y, timestamp):
 		"""
 		Train a new model by the given data [X] related to the given target [Y]
 		:param X:
 		:param Y:
+		:param timestamp:
 		"""
 		log.debug("train | START")
-		if self.classifier is not None:
-			log.debug("train | Training ...")
-			X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.25)
-			self.classifier.fit(X_train, Y_train)
-			log.debug("train | Model Trained!")
-			log.debug("train | Checking performance ...")
-			y_pred = self.classifier.predict(x_test)
-			# Static method
-			self.verify_performance(y_test, y_pred)
-			return self.dump_model(self.model_path, "model")
-
-	def tuning(self, X, Y):
+		if self.classifier is None:
+			self.init_classifier()
+
+		dump_dataset(X, Y, os.path.join(self.model_path, timestamp))
+
+		start_time = time.time()
+
+		X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.25)
+		log.debug("train | Training ...")
+		self.classifier.fit(X_train, Y_train)
+		log.debug("train | Model Trained!")
+		log.debug("train | Checking performance ...")
+		y_pred = self.classifier.predict(x_test)
+		# Static method
+		self.verify_performance(y_test, y_pred)
+
+		return self.dump_model(timestamp=timestamp, classifier=self.classifier), time.time() - start_time
+
+	def tuning(self, X, Y, timestamp):
 		"""
 		Tune the hyperparameter of a new model by the given data [X] related to the given target [Y]
 
 		:param X:
 		:param Y:
+		:param timestamp:
 		:return:
 		"""
+		start_time = time.time()
+		dump_dataset(X, Y, os.path.join(self.model_path, timestamp))
+
 		X_train, x_test, Y_train, y_test = train_test_split(X, Y, test_size=0.25)
 		self.classifier = KNeighborsClassifier()
 		# Hyperparameter of the neural network (KKN)
@@ -159,7 +182,7 @@ def tuning(self, X, Y):
 			'p': power_range,
 		}
 		log.debug("tuning | Parameter -> {}".format(pformat(parameter_space)))
-		grid = GridSearchCV(self.classifier, parameter_space, cv=3, scoring='accuracy', verbose=10, n_jobs=3)
+		grid = GridSearchCV(self.classifier, parameter_space, cv=3, scoring='accuracy', verbose=10, n_jobs=1)
 		grid.fit(X_train, Y_train)
 		log.info("TUNING COMPLETE | DUMPING DATA!")
 		# log.info("tuning | Grid Scores: {}".format(pformat(grid.grid_scores_)))
@@ -171,7 +194,8 @@ def tuning(self, X, Y):
 
 		self.verify_performance(y_test, y_pred)
 
-		return self.dump_model(params=grid.best_params_)
+		return self.dump_model(timestamp=timestamp, params=grid.best_params_,
+		                       classifier=grid.best_estimator_), time.time() - start_time
 
 	@staticmethod
 	def verify_performance(y_test, y_pred):
@@ -183,42 +207,47 @@ def verify_performance(y_test, y_pred):
 		"""
 
 		log.debug("verify_performance | Analyzing performance ...")
-		# log.info("Computing classifier score --> {}".format(pformat(clf.score(y_test,y_pred))))
 		log.info("Classification Report: {}".format(pformat(classification_report(y_test, y_pred))))
 		log.info("balanced_accuracy_score: {}".format(pformat(balanced_accuracy_score(y_test, y_pred))))
 		log.info("accuracy_score: {}".format(pformat(accuracy_score(y_test, y_pred))))
 		log.info("precision_score: {}".format(pformat(precision_score(y_test, y_pred, average='weighted'))))
 
-	def dump_model(self, params, path=None, file=None):
+	def dump_model(self, timestamp, classifier, params=None, path=None):
 		"""
 		Dump the model to the given path, file
 		:param params:
+		:param timestamp:
+		:param classifier:
 		:param path:
-		:param file:
+
 		"""
+		log.debug("dump_model | Dumping model ...")
 		if path is None:
 			if self.model_path is not None:
 				if os.path.exists(self.model_path) and os.path.isdir(self.model_path):
 					path = self.model_path
-		if file is None:
-			file = "model"
-
-		if os.path.isdir(path):
-			time_parsed = datetime.now().strftime('%Y%m%d_%H%M%S')
-			classifier_file = os.path.join(path, "{}-{}".format(file, time_parsed))
-			config = {'classifier_file': classifier_file,
-			          'params': params
-			          }
+		config = {'classifier_file': os.path.join(timestamp, "model.clf"),
+		          'params': params
+		          }
+		if not os.path.isdir(path):
+			os.makedirs(timestamp)
+			classifier_folder = os.path.join(path, timestamp)
+			classifier_file = os.path.join(classifier_folder, "model")
 
 			log.debug("dump_model | Dumping model ... | Path: {} | File: {}".format(path, classifier_file))
 			# TODO: Save every model in a different folder
+			if not os.path.exists(classifier_folder):
+				os.makedirs(classifier_folder)
+
 			with open(classifier_file + ".clf", 'wb') as f:
-				pickle.dump(self.classifier, f)
+				pickle.dump(classifier, f)
+
+
 			with open(classifier_file + ".json", 'w') as f:
 				json.dump(config, f)
 				log.info('dump_model | Configuration saved to {0}'.format(classifier_file))
 
-			return config
+		return config
 
 	def init_peoples_list(self, peoples_path=None):
 		"""
@@ -230,11 +259,17 @@ def init_peoples_list(self, peoples_path=None):
 		log.debug("init_peoples_list | Initalizing people ...")
 		if peoples_path is not None and os.path.isdir(peoples_path):
 			self.training_dir = peoples_path
-		# freq_list = pool.map(partial(get_frequency, nlp=nlp_en, client=mongo_client), fileList)
-		pool = ThreadPool(3)
-		self.peoples_list = pool.map(self.init_peoples_list_core, os.listdir(self.training_dir))
+		#pool = ThreadPool(3)
+		#self.peoples_list = pool.map(self.init_peoples_list_core, os.listdir(self.training_dir))
+
+		for people_name in tqdm(os.listdir(self.training_dir),
+		                        total=len(os.listdir(self.training_dir)), desc="Init people list ..."):
+			self.peoples_list.append(self.init_peoples_list_core(people_name))
+
 		self.peoples_list = list(filter(None.__ne__, self.peoples_list))  # Remove None
 
+
+
 	# TODO: Add method for dump datastructure in order to don't wait to load same data for test
 
 	def init_peoples_list_core(self, people_name):
@@ -272,7 +307,6 @@ def init_dataset(self):
 				DATASET["X"].append(item)
 			for item in people.dataset["Y"]:
 				DATASET["Y"].append(item)
-		dump_dataset(DATASET, self.model_path)
 		return DATASET
 
 	# TODO: Add configuration parameter for choose the distance_threshold
diff --git a/datastructure/Person.py b/datastructure/Person.py
index 0df3ae1..2d8a9d8 100644
--- a/datastructure/Person.py
+++ b/datastructure/Person.py
@@ -42,7 +42,7 @@ def init_dataset(self):
 		if self.path != "" and isdir(self.path):
 			log.debug("initDataset | Paramater provided, iterating images ..")
 			# Iterating the images in parallel
-			pool = ThreadPool(1)
+			pool = ThreadPool(2)
 			self.dataset["X"] = pool.map(self.init_dataset_core, image_files_in_folder(self.path))
 			self.dataset["X"] = list(filter(None.__ne__, self.dataset["X"]))  # Remove None
 			# Loading the Y [target]
diff --git a/datastructure/Response.py b/datastructure/Response.py
index 82cbaa8..c028edf 100644
--- a/datastructure/Response.py
+++ b/datastructure/Response.py
@@ -12,9 +12,9 @@ class Response(object):
 	external tools
 	"""
 
-	def __init__(self):
-		self.status = "KO"
-		self.description = None
-		self.error = None
-		self.data = None
+	def __init__(self, status="KO", description=None, error=None, data=None):
+		self.status = status
+		self.description = description
+		self.error = error
+		self.data = data
 		self.date = str(datetime.now())
diff --git a/main.py b/main.py
index 79617a2..e0202bb 100644
--- a/main.py
+++ b/main.py
@@ -39,7 +39,7 @@
 log.debug("Init classifier ...")
 clf = Classifier()
 clf.model_path = CFG["classifier"]["model_path"]
-clf.load_classifier_from_file(CFG["classifier"]["model"])
+clf.load_classifier_from_file(CFG["classifier"]["timestamp"])
 
 # TODO Add check on extension
 allowed_ext = ["jpg", "jpeg", "png"]
diff --git a/utils/util.py b/utils/util.py
index 1db6639..a34cf36 100644
--- a/utils/util.py
+++ b/utils/util.py
@@ -10,7 +10,6 @@
 import shutil
 import string
 import zipfile
-from datetime import datetime
 from logging.handlers import TimedRotatingFileHandler
 
 from PIL import Image, ImageDraw
@@ -132,7 +131,7 @@ def unzip_data(unzipped_folder, zip_file):
 	Unzip the zip file in input in the given 'unzipped_folder'
 	:param unzipped_folder:
 	:param zip_file:
-	:return:
+	:return: The name of the folder in which find the unzipped data
 	"""
 	log = logging.getLogger()
 	folder_name = os.path.join(unzipped_folder, random_string())
@@ -144,23 +143,28 @@ def unzip_data(unzipped_folder, zip_file):
 	return folder_name
 
 
-def dump_dataset(dataset, path, dataset_name=None):
+def dump_dataset(X, Y, path):
 	"""
 
-	:param dataset:
+	:param X:
+	:param Y:
 	:param path:
-	:param dataset_name:
 	:return:
 	"""
 	log = logging.getLogger()
-	log.debug("dump_dataset | Dumping {} {}".format(path, dataset_name))
-	if os.path.exists(path) and os.path.isdir(path):
-		if dataset_name is None:
-			dataset_name = "image_dataset"
-		time_parsed = datetime.now().strftime('%Y%m%d_%H%M%S')
-		dataset_name = os.path.join(path, "{}-{}".format(dataset_name, time_parsed))
-		with open(dataset_name + ".dat", 'wb') as f:
+	dataset = {
+		'X': X,
+		'Y': Y
+	}
+	log.debug("dump_dataset | Dumping dataset int {}".format(path))
+	if not os.path.exists(path):
+		os.makedirs(path)
+		log.debug("dump_dataset | Path {} exist".format(path))
+		dataset_name = os.path.join(path, "model.dat")
+		with open(dataset_name, 'wb') as f:
 			pickle.dump(dataset, f)
+	else:
+		log.error("dump_dataset | Path {} ALREDY EXIST exist".format(path))
 
 
 def remove_dir(directory):
@@ -173,3 +177,49 @@ def remove_dir(directory):
 	log.debug("remove_dir | Removing directory {}".format(directory))
 	if os.path.isdir(directory):
 		shutil.rmtree(directory)
+
+
+def verify_extension(file):
+	"""
+	Wrapper for validate file
+	:param file:
+	:return:
+	"""
+	log = logging.getLogger()
+	extension = os.path.splitext(file)[1]
+	log.debug("verify_extension | File: {} | Ext: {}".format(file, extension))
+	if extension == ".zip":
+		# In this case we have to analyze the photos
+		return "zip"
+	elif extension == ".dat":
+		# Photos have been alredy analyzed, dataset is ready!
+		return "dat"
+	return None
+
+
+def retrieve_dataset(folder_uncompress, zip_file, clf):
+	"""
+
+	:param folder_uncompress:
+	:param zip_file:
+	:param clf:
+	:return:
+	"""
+	log = logging.getLogger()
+	log.debug("retrieve_dataset | Parsing dataset ...")
+	check = verify_extension(zip_file.filename)
+	if check == "zip":  # Image provided
+		log.debug("retrieve_dataset | Zip file uploaded")
+		folder_name = unzip_data(folder_uncompress, zip_file)
+		log.debug("retrieve_dataset | zip file uncompressed!")
+		clf.init_peoples_list(peoples_path=folder_name)
+		dataset = clf.init_dataset()
+		log.debug("retrieve_dataset | Removing [{}]".format(folder_name))
+		remove_dir(folder_name)
+	elif check == "dat":
+		log.debug("retrieve_dataset | Pickle data uploaded")
+		dataset = pickle.load(zip_file)
+	else:
+		dataset = None
+	log.debug("tune_network | Dataset parsed!")
+	return dataset