-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_dataset_manipulation.py
49 lines (38 loc) · 1.24 KB
/
main_dataset_manipulation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import logging
import os.path
import time
from dataclasses import dataclass, field
from pprint import pprint
from transformers import HfArgumentParser
from svc_demographic_bias_assessment import DatasetManipulator, SVCDataset
logger = logging.getLogger(__name__)
@dataclass
class ScriptArguments:
"""
Arguments needed to run this script.
"""
dataset_directory: str = field(
metadata={"help": "Directory pointing towards the SVC Dataset."},
)
def __post_init__(self):
assert os.path.exists(self.dataset_directory)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
dataclasses_list = [
ScriptArguments,
]
parser = HfArgumentParser(dataclasses_list)
args = parser.parse_args_into_dataclasses()
datapath_args = args[0]
svc_dataset = SVCDataset(dataset_directory=datapath_args.dataset_directory)
dataset_manipulator = DatasetManipulator(dataset=svc_dataset)
available_methods = [
method
for method in dir(dataset_manipulator)
if method.startswith("__") is False
and method.startswith("_") is False
and method != "dataset"
]
logger.info("All available methods are:")
time.sleep(0.5)
pprint(available_methods)