-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathner.py
executable file
·79 lines (55 loc) · 2.25 KB
/
ner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""This module contains the CAMeL Tools Named Entity Recognition component.
"""
from helpers.helper import prepare_output
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
# from transformers import BertForTokenClassification, BertTokenizer
from helpers.helper import en_to_ar_camel
from camel_tools.ner import NERecognizer
import os
import subprocess
if not os.path.exists('model/'):
subprocess.call( 'mkdir model/', shell=True)
print('============= MAKING Model DIR ===============')
if not os.path.exists('model/camel'):
subprocess.call( 'mkdir model/camel', shell=True)
print('============= MAKING Camel DIR ===============')
if not os.path.exists('model/ours'):
subprocess.call( 'mkdir model/ours', shell=True)
print('============= MAKING Ours DIR ===============')
# FOR Gcloud
'''
model_path = os.path.dirname(os.path.abspath(__file__))+'/model/camel'
os.environ["CAMELTOOLS_DATA"] = model_path
copy_path = model_path+'/data'
if not os.path.exists('/root/.camel_tools/'):
subprocess.call( 'mkdir /root/.camel_tools/', shell=True)
print('============= MAKING DIR ===============')
if not os.path.exists('/root/.camel_tools/data/'):
# subprocess.call( 'sudo cp -r '+ copy_path + '/ ' + '/root/.camel_tools/' , shell=True)
## This is the working one
subprocess.call( 'cp -r '+ copy_path + '/ ' + '/root/.camel_tools/' , shell=True)
print(subprocess.call('echo $CAMELTOOLS_DATA' , shell=True))
print(model_path)
print(copy_path)
print( os.listdir(model_path))
print( os.listdir('/root/.camel_tools/'))
print( os.listdir('/root/'))
'''
ner = NERecognizer.pretrained()
def test_camel(s):
# Predict the labels of a single sentence.
# The sentence must be pretokenized by whitespace and punctuation.
sentence = s.split()
labels = ner.predict_sentence(sentence)
res = ''
# Print the list of token-label pairs
for token, label in zip(sentence, labels):
if(label == 'O'):
continue
# print("{}\t{}".format(label, token))
s = f"{en_to_ar_camel[label]} {label} {token}"
res = res + s + '\n'
return res , labels , sentence