-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathworkers.py
38 lines (29 loc) · 1.14 KB
/
workers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from PyPDF2 import PdfFileReader
from question_generation_main import QuestionGeneration
def pdf2text(file_path: str, file_exten: str) -> str:
""" Converts a given file to text content """
_content = ''
# Identify file type and get its contents
if file_exten == 'pdf':
with open(file_path, 'rb') as pdf_file:
_pdf_reader = PdfFileReader(pdf_file)
for p in range(_pdf_reader.numPages):
_content += _pdf_reader.getPage(p).extractText()
# _content = _pdf_reader.getPage(0).extractText()
print('PDF operation done!')
elif file_exten == 'txt':
with open(file_path, 'r') as txt_file:
_content = txt_file.read()
print('TXT operation done!')
return _content
def txt2questions(doc: str, n=5, o=4) -> dict:
""" Get all questions and options """
qGen = QuestionGeneration(n, o)
q = qGen.generate_questions_dict(doc)
for i in range(len(q)):
temp = []
for j in range(len(q[i + 1]['options'])):
temp.append(q[i + 1]['options'][j + 1])
# print(temp)
q[i + 1]['options'] = temp
return q