Skip to content

Commit

Permalink
Merge pull request #1 from serenalotreck/brat
Browse files Browse the repository at this point in the history
Bugfixes for dropped entity repercussions
  • Loading branch information
serenalotreck authored Dec 7, 2022
2 parents 1345d73 + 2c7d36e commit 9497124
Show file tree
Hide file tree
Showing 3 changed files with 296 additions and 35 deletions.
147 changes: 138 additions & 9 deletions dygie/tests/data/annotated_doc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,33 +110,75 @@ def setUp(self):
nlp,
dataset,
coref=True)
self.annotated_doc.char_to_token()

# Set up relation
self.rel1 = ad.BinRel("R1\tMayor-Of Arg1:T2 Arg2:T3".split())

# Right answer
self.relations = [[], [[6, 7, 9, 11, "Mayor-Of"]], []]

# Missing entity annotations
missing_ann = ("T1\tCity 0 7\tSeattle\n"
"T2\tPerson 22 37\tJenny Durkan\n"
"T3\tCity 41 51\tthe city's\n"
"T4\tPerson 59 62\tShe\n"
"T5\tPersonnel.Election 67 74\telected\n"
"T6\tYear 78 82\t2017\n"
"R1\tMayor-Of Arg1:T2 Arg2:T3\n"
"E1\tPersonnel.Election:T5 Person:T4 Year:T6\n"
"*\tEQUIV T1 T3\n"
"*\tEQUIV T2 T4\n")
missing_ann_path = f'{self.tmpdir}/missing_myfile.ann'
with open(missing_ann_path, 'w') as f:
f.write(missing_ann)

# Set up annotated_doc object
self.missing_annotated_doc = ad.AnnotatedDoc.parse_ann(
text_path, missing_ann_path, nlp, dataset, coref=True)

# Right answer
self.missing_relations = [[], [], []]

def tearDown(self):

shutil.rmtree(self.tmpdir)

# set_arg_objects is always called *before* char_to_token
# They will fail if run in the opposite order with entities that get
# dropped, but if they are only used with brat_to_input.py, the order is
# baked in and therefore safe
def test_set_arg_objects(self):

self.rel1.set_arg_objects(self.annotated_doc.ents)

self.assertEqual(self.rel1.arg1, self.annotated_doc.ents[1])
self.assertEqual(self.rel1.arg2, self.annotated_doc.ents[2])

def test_set_arg_objects_missing_arg(self):

self.rel1.set_arg_objects(self.missing_annotated_doc.ents)

self.assertEqual(self.rel1.arg1, self.missing_annotated_doc.ents[1])
self.assertEqual(self.rel1.arg2, self.missing_annotated_doc.ents[2])

def test_format_bin_rels_dygiepp(self):

self.rel1.set_arg_objects(self.annotated_doc.ents)
relations = ad.BinRel.format_bin_rels_dygiepp([self.rel1],
self.sent_idx_tups)
self.annotated_doc.char_to_token()
relations, dropped_rels = ad.BinRel.format_bin_rels_dygiepp(
[self.rel1], self.sent_idx_tups)

self.assertEqual(relations, self.relations)

def test_format_bin_rels_dygiepp_missing_arg(self):

self.rel1.set_arg_objects(self.missing_annotated_doc.ents)
self.missing_annotated_doc.char_to_token()
relations, dropped_rels = ad.BinRel.format_bin_rels_dygiepp(
[self.rel1], self.sent_idx_tups)

self.assertEqual(relations, self.missing_relations)


class TestEvent(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -175,7 +217,6 @@ def setUp(self):
nlp,
dataset,
coref=True)
self.annotated_doc.char_to_token()

# Set up events
self.event1 = ad.Event(
Expand All @@ -186,6 +227,28 @@ def setUp(self):
[[[16, "Personnel.Election"], [14, 14, "Person"],
[18, 18, "Year"]]]]

# Missing entity annotations
missing_ann = ("T1\tCity 0 7\tSeattle\n"
"T2\tPerson 22 37\tJenny Durkan\n"
"T3\tCity 41 51\tthe city's\n"
"T4\tPerson 59 62\tShe\n"
"T5\tPersonnel.Election 63 74\telected\n"
"T6\tYear 78 82\t2017\n"
"R1\tMayor-Of Arg1:T2 Arg2:T3\n"
"E1\tPersonnel.Election:T5 Person:T4 Year:T6\n"
"*\tEQUIV T1 T3\n"
"*\tEQUIV T2 T4\n")
missing_ann_path = f'{self.tmpdir}/missing_myfile.ann'
with open(missing_ann_path, 'w') as f:
f.write(missing_ann)

# Set up annotated_doc object
self.missing_annotated_doc = ad.AnnotatedDoc.parse_ann(
text_path, missing_ann_path, nlp, dataset, coref=True)

# Right answer
self.missing_events = [[], [], []]

def tearDown(self):

shutil.rmtree(self.tmpdir)
Expand All @@ -202,11 +265,32 @@ def test_set_arg_objects(self):
def test_format_events_dygiepp(self):

self.event1.set_arg_objects(self.annotated_doc.ents)
events = ad.Event.format_events_dygiepp([self.event1],
self.sent_idx_tups)
self.annotated_doc.char_to_token()
events, dropped_events = ad.Event.format_events_dygiepp(
[self.event1], self.sent_idx_tups)

self.assertEqual(events, self.events)

def test_set_arg_objects_missing_ann(self):

self.event1.set_arg_objects(self.missing_annotated_doc.ents)

self.assertEqual(self.event1.trigger,
self.missing_annotated_doc.ents[4])
self.assertEqual(self.event1.args, [
self.missing_annotated_doc.ents[3],
self.missing_annotated_doc.ents[5]
])

def test_format_events_dygiepp_missing_ann(self):

self.event1.set_arg_objects(self.missing_annotated_doc.ents)
self.missing_annotated_doc.char_to_token()
events, dropped_events = ad.Event.format_events_dygiepp(
[self.event1], self.sent_idx_tups)

self.assertEqual(events, self.missing_events)


class TestEquivRel(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -243,7 +327,6 @@ def setUp(self):
nlp,
dataset,
coref=True)
self.annotated_doc.char_to_token()

# Set up equivalence relations
self.equivrel1 = ad.EquivRel("*\tEQUIV T1 T3".split())
Expand All @@ -252,6 +335,28 @@ def setUp(self):
# The dygiepp-formatted correct answer
self.corefs = [[[0, 0], [9, 11]], [[6, 7], [14, 14]]]

# Missing entity annotations
missing_ann = ("T1\tCity 0 7\tSeattle\n"
"T2\tPerson 22 37\tJenny Durkan\n"
"T3\tCity 41 51\tthe city's\n"
"T4\tPerson 59 62\tShe\n"
"T5\tPersonnel.Election 67 74\telected\n"
"T6\tYear 78 82\t2017\n"
"R1\tMayor-Of Arg1:T2 Arg2:T3\n"
"E1\tPersonnel.Election:T5 Person:T4 Year:T6\n"
"*\tEQUIV T1 T3\n"
"*\tEQUIV T2 T4\n")
missing_ann_path = f'{self.tmpdir}/missing_myfile.ann'
with open(missing_ann_path, 'w') as f:
f.write(missing_ann)

# Set up annotated_doc object
self.missing_annotated_doc = ad.AnnotatedDoc.parse_ann(
text_path, missing_ann_path, nlp, dataset, coref=True)

# The dygiepp-formatted correct answer
self.missing_corefs = [[[0, 0], [9, 11]]]

def tearDown(self):

shutil.rmtree(self.tmpdir)
Expand All @@ -272,11 +377,36 @@ def test_format_corefs_dygiepp(self):

self.equivrel1.set_arg_objects(self.annotated_doc.ents)
self.equivrel2.set_arg_objects(self.annotated_doc.ents)
corefs = ad.EquivRel.format_corefs_dygiepp(
self.annotated_doc.char_to_token()
corefs, dropped_equiv_rels = ad.EquivRel.format_corefs_dygiepp(
[self.equivrel1, self.equivrel2])

self.assertEqual(corefs, self.corefs)

def test_set_arg_objects_missing_ann(self):

self.equivrel1.set_arg_objects(self.missing_annotated_doc.ents)
self.equivrel2.set_arg_objects(self.missing_annotated_doc.ents)

self.assertEqual(self.equivrel1.args, [
self.missing_annotated_doc.ents[0],
self.missing_annotated_doc.ents[2]
])
self.assertEqual(self.equivrel2.args, [
self.missing_annotated_doc.ents[1],
self.missing_annotated_doc.ents[3]
])

def test_format_corefs_dygiepp_missing_ann(self):

self.equivrel1.set_arg_objects(self.missing_annotated_doc.ents)
self.equivrel2.set_arg_objects(self.missing_annotated_doc.ents)
self.missing_annotated_doc.char_to_token()
corefs, dropped_equiv_rels = ad.EquivRel.format_corefs_dygiepp(
[self.equivrel1, self.equivrel2])

self.assertEqual(corefs, self.missing_corefs)


class TestAnnotatedDoc(unittest.TestCase):
"""
Expand Down Expand Up @@ -362,7 +492,6 @@ def test_format_dygiepp(self):
coref=True)
annotated_doc.char_to_token()
res = annotated_doc.format_dygiepp()

self.assertEqual(res, self.dygiepp_dict)


Expand Down
Loading

0 comments on commit 9497124

Please sign in to comment.