diff --git a/dygie/tests/data/annotated_doc_test.py b/dygie/tests/data/annotated_doc_test.py
index 184ea98..4a1db16 100644
--- a/dygie/tests/data/annotated_doc_test.py
+++ b/dygie/tests/data/annotated_doc_test.py
@@ -110,7 +110,6 @@ def setUp(self):
                                                        nlp,
                                                        dataset,
                                                        coref=True)
-        self.annotated_doc.char_to_token()
 
         # Set up relation
         self.rel1 = ad.BinRel("R1\tMayor-Of Arg1:T2 Arg2:T3".split())
@@ -118,10 +117,36 @@ def setUp(self):
         # Right answer
         self.relations = [[], [[6, 7, 9, 11, "Mayor-Of"]], []]
 
+        # Missing entity annotations
+        missing_ann = ("T1\tCity 0 7\tSeattle\n"
+                       "T2\tPerson 22 37\tJenny Durkan\n"
+                       "T3\tCity 41 51\tthe city's\n"
+                       "T4\tPerson 59 62\tShe\n"
+                       "T5\tPersonnel.Election 67 74\telected\n"
+                       "T6\tYear 78 82\t2017\n"
+                       "R1\tMayor-Of Arg1:T2 Arg2:T3\n"
+                       "E1\tPersonnel.Election:T5 Person:T4 Year:T6\n"
+                       "*\tEQUIV T1 T3\n"
+                       "*\tEQUIV T2 T4\n")
+        missing_ann_path = f'{self.tmpdir}/missing_myfile.ann'
+        with open(missing_ann_path, 'w') as f:
+            f.write(missing_ann)
+
+        # Set up annotated_doc object
+        self.missing_annotated_doc = ad.AnnotatedDoc.parse_ann(
+            text_path, missing_ann_path, nlp, dataset, coref=True)
+
+        # Right answer
+        self.missing_relations = [[], [], []]
+
     def tearDown(self):
 
         shutil.rmtree(self.tmpdir)
 
+    # set_arg_objects is always called *before* char_to_token
+    # They will fail if run in the opposite order with entities that get
+    # dropped, but if they are only used with brat_to_input.py, the order is
+    # baked in and therefore safe
     def test_set_arg_objects(self):
 
         self.rel1.set_arg_objects(self.annotated_doc.ents)
@@ -129,14 +154,31 @@ def test_set_arg_objects(self):
         self.assertEqual(self.rel1.arg1, self.annotated_doc.ents[1])
         self.assertEqual(self.rel1.arg2, self.annotated_doc.ents[2])
 
+    def test_set_arg_objects_missing_arg(self):
+
+        self.rel1.set_arg_objects(self.missing_annotated_doc.ents)
+
+        self.assertEqual(self.rel1.arg1, self.missing_annotated_doc.ents[1])
+        self.assertEqual(self.rel1.arg2, self.missing_annotated_doc.ents[2])
+
     def test_format_bin_rels_dygiepp(self):
 
         self.rel1.set_arg_objects(self.annotated_doc.ents)
-        relations, dropped_rels = ad.BinRel.format_bin_rels_dygiepp([self.rel1],
-                                                      self.sent_idx_tups)
+        self.annotated_doc.char_to_token()
+        relations, dropped_rels = ad.BinRel.format_bin_rels_dygiepp(
+            [self.rel1], self.sent_idx_tups)
 
         self.assertEqual(relations, self.relations)
 
+    def test_format_bin_rels_dygiepp_missing_arg(self):
+
+        self.rel1.set_arg_objects(self.missing_annotated_doc.ents)
+        self.missing_annotated_doc.char_to_token()
+        relations, dropped_rels = ad.BinRel.format_bin_rels_dygiepp(
+            [self.rel1], self.sent_idx_tups)
+
+        self.assertEqual(relations, self.missing_relations)
+
 
 class TestEvent(unittest.TestCase):
     def setUp(self):
@@ -175,7 +217,6 @@ def setUp(self):
                                                        nlp,
                                                        dataset,
                                                        coref=True)
-        self.annotated_doc.char_to_token()
 
         # Set up events
         self.event1 = ad.Event(
@@ -186,6 +227,28 @@ def setUp(self):
                        [[[16, "Personnel.Election"], [14, 14, "Person"],
                          [18, 18, "Year"]]]]
 
+        # Missing entity annotations
+        missing_ann = ("T1\tCity 0 7\tSeattle\n"
+                       "T2\tPerson 22 37\tJenny Durkan\n"
+                       "T3\tCity 41 51\tthe city's\n"
+                       "T4\tPerson 59 62\tShe\n"
+                       "T5\tPersonnel.Election 63 74\telected\n"
+                       "T6\tYear 78 82\t2017\n"
+                       "R1\tMayor-Of Arg1:T2 Arg2:T3\n"
+                       "E1\tPersonnel.Election:T5 Person:T4 Year:T6\n"
+                       "*\tEQUIV T1 T3\n"
+                       "*\tEQUIV T2 T4\n")
+        missing_ann_path = f'{self.tmpdir}/missing_myfile.ann'
+        with open(missing_ann_path, 'w') as f:
+            f.write(missing_ann)
+
+        # Set up annotated_doc object
+        self.missing_annotated_doc = ad.AnnotatedDoc.parse_ann(
+            text_path, missing_ann_path, nlp, dataset, coref=True)
+
+        # Right answer
+        self.missing_events = [[], [], []]
+
     def tearDown(self):
 
         shutil.rmtree(self.tmpdir)
@@ -202,11 +265,32 @@ def test_set_arg_objects(self):
     def test_format_events_dygiepp(self):
 
         self.event1.set_arg_objects(self.annotated_doc.ents)
-        events, dropped_events = ad.Event.format_events_dygiepp([self.event1],
-                                                self.sent_idx_tups)
+        self.annotated_doc.char_to_token()
+        events, dropped_events = ad.Event.format_events_dygiepp(
+            [self.event1], self.sent_idx_tups)
 
         self.assertEqual(events, self.events)
 
+    def test_set_arg_objects_missing_ann(self):
+
+        self.event1.set_arg_objects(self.missing_annotated_doc.ents)
+
+        self.assertEqual(self.event1.trigger,
+                         self.missing_annotated_doc.ents[4])
+        self.assertEqual(self.event1.args, [
+            self.missing_annotated_doc.ents[3],
+            self.missing_annotated_doc.ents[5]
+        ])
+
+    def test_format_events_dygiepp_missing_ann(self):
+
+        self.event1.set_arg_objects(self.missing_annotated_doc.ents)
+        self.missing_annotated_doc.char_to_token()
+        events, dropped_events = ad.Event.format_events_dygiepp(
+            [self.event1], self.sent_idx_tups)
+
+        self.assertEqual(events, self.missing_events)
+
 
 class TestEquivRel(unittest.TestCase):
     def setUp(self):
@@ -243,7 +327,6 @@ def setUp(self):
                                                        nlp,
                                                        dataset,
                                                        coref=True)
-        self.annotated_doc.char_to_token()
 
         # Set up equivalence relations
         self.equivrel1 = ad.EquivRel("*\tEQUIV T1 T3".split())
@@ -252,6 +335,28 @@ def setUp(self):
         # The dygiepp-formatted correct answer
         self.corefs = [[[0, 0], [9, 11]], [[6, 7], [14, 14]]]
 
+        # Missing entity annotations
+        missing_ann = ("T1\tCity 0 7\tSeattle\n"
+                       "T2\tPerson 22 37\tJenny Durkan\n"
+                       "T3\tCity 41 51\tthe city's\n"
+                       "T4\tPerson 59 62\tShe\n"
+                       "T5\tPersonnel.Election 67 74\telected\n"
+                       "T6\tYear 78 82\t2017\n"
+                       "R1\tMayor-Of Arg1:T2 Arg2:T3\n"
+                       "E1\tPersonnel.Election:T5 Person:T4 Year:T6\n"
+                       "*\tEQUIV T1 T3\n"
+                       "*\tEQUIV T2 T4\n")
+        missing_ann_path = f'{self.tmpdir}/missing_myfile.ann'
+        with open(missing_ann_path, 'w') as f:
+            f.write(missing_ann)
+
+        # Set up annotated_doc object
+        self.missing_annotated_doc = ad.AnnotatedDoc.parse_ann(
+            text_path, missing_ann_path, nlp, dataset, coref=True)
+
+        # The dygiepp-formatted correct answer
+        self.missing_corefs = [[[0, 0], [9, 11]]]
+
     def tearDown(self):
 
         shutil.rmtree(self.tmpdir)
@@ -272,11 +377,36 @@ def test_format_corefs_dygiepp(self):
 
         self.equivrel1.set_arg_objects(self.annotated_doc.ents)
         self.equivrel2.set_arg_objects(self.annotated_doc.ents)
+        self.annotated_doc.char_to_token()
         corefs, dropped_equiv_rels = ad.EquivRel.format_corefs_dygiepp(
             [self.equivrel1, self.equivrel2])
 
         self.assertEqual(corefs, self.corefs)
 
+    def test_set_arg_objects_missing_ann(self):
+
+        self.equivrel1.set_arg_objects(self.missing_annotated_doc.ents)
+        self.equivrel2.set_arg_objects(self.missing_annotated_doc.ents)
+
+        self.assertEqual(self.equivrel1.args, [
+            self.missing_annotated_doc.ents[0],
+            self.missing_annotated_doc.ents[2]
+        ])
+        self.assertEqual(self.equivrel2.args, [
+            self.missing_annotated_doc.ents[1],
+            self.missing_annotated_doc.ents[3]
+        ])
+
+    def test_format_corefs_dygiepp_missing_ann(self):
+
+        self.equivrel1.set_arg_objects(self.missing_annotated_doc.ents)
+        self.equivrel2.set_arg_objects(self.missing_annotated_doc.ents)
+        self.missing_annotated_doc.char_to_token()
+        corefs, dropped_equiv_rels = ad.EquivRel.format_corefs_dygiepp(
+            [self.equivrel1, self.equivrel2])
+
+        self.assertEqual(corefs, self.missing_corefs)
+
 
 class TestAnnotatedDoc(unittest.TestCase):
     """
@@ -362,7 +492,6 @@ def test_format_dygiepp(self):
                                                   coref=True)
         annotated_doc.char_to_token()
         res = annotated_doc.format_dygiepp()
-
         self.assertEqual(res, self.dygiepp_dict)
 
 
diff --git a/scripts/new-dataset/annotated_doc.py b/scripts/new-dataset/annotated_doc.py
index fcaac30..f3b12c9 100644
--- a/scripts/new-dataset/annotated_doc.py
+++ b/scripts/new-dataset/annotated_doc.py
@@ -80,7 +80,7 @@ def parse_ann(cls, txt, ann, nlp, dataset, coref):
                 if ';' in line[:second_tab]:
                     idx = line[:line.index("\t")]
                     warnings.warn(f'Entity "{line[second_tab:]}" (ID: '
-                          f'{idx}) is disjoint, and will be dropped.')
+                                  f'{idx}) is disjoint, and will be dropped.')
                 else:
                     lines_continuous.append(line)
             else:
@@ -128,7 +128,6 @@ def parse_ann(cls, txt, ann, nlp, dataset, coref):
 
         return annotated_doc
 
-
     def set_annotation_objects(self):
         """
         For each type of annotation, replace the string IDs with the
@@ -138,7 +137,6 @@ def set_annotation_objects(self):
         [event.set_arg_objects(self.ents) for event in self.events]
         [equiv_rel.set_arg_objects(self.ents) for equiv_rel in self.equiv_rels]
 
-
     def format_dygiepp(self):
         """
         Creates a dygiepp-formatted json for the doc, using each class'
@@ -157,23 +155,27 @@ def format_dygiepp(self):
 
         # Format data
         ner = Ent.format_ner_dygiepp(self.ents, sent_idx_tups)
-        bin_rels, self.dropped_rels = BinRel.format_bin_rels_dygiepp(self.bin_rels,
-                sent_idx_tups)
-        print(f'Completed relation formatting for {self.doc_key}. {self.dropped_rels} of '
-            f'{self.total_original_rels} relations were dropped due to tokenization mismatches.')
+        bin_rels, self.dropped_rels = BinRel.format_bin_rels_dygiepp(
+            self.bin_rels, sent_idx_tups)
+        print(
+            f'Completed relation formatting for {self.doc_key}. {self.dropped_rels} of '
+            f'{self.total_original_rels} relations were dropped due to tokenization mismatches.'
+        )
         if len(self.equiv_rels
                ) > 0 and self.coref:  # Some datasets don't have coreferences
-            corefs, self.dropped_equiv_rels = EquivRel.format_corefs_dygiepp(self.equiv_rels)
+            corefs, self.dropped_equiv_rels = EquivRel.format_corefs_dygiepp(
+                self.equiv_rels)
             print(f'Completed coreference formatting for {self.doc_key}. '
-                    f'{self.dropped_equiv_rels} of '
-                    f'{self.total_original_equiv_rels} were dropped due to '
-                    'tokenization mismatches.')
+                  f'{self.dropped_equiv_rels} of '
+                  f'{self.total_original_equiv_rels} were dropped due to '
+                  'tokenization mismatches.')
         if len(self.events) > 0:  # Some datasets don't have events
-            events = Event.format_events_dygiepp(self.events, sent_idx_tups)
+            events, self.dropped_events = Event.format_events_dygiepp(
+                self.events, sent_idx_tups)
             print(f'Completed event formatting for {self.doc_key}. '
-                    f'{self.dropped_events} of '
-                    f'{self.total_original_events} were dropped due to '
-                    'tokenization mismatches.')
+                  f'{self.dropped_events} of '
+                  f'{self.total_original_events} were dropped due to '
+                  'tokenization mismatches.')
 
         # Make dict
         res = {
@@ -192,7 +194,6 @@ def format_dygiepp(self):
 
         return res
 
-
     def char_to_token(self):
         """
         Does the heavy lifting for converting brat format to dygiepp format.
@@ -218,8 +219,9 @@ def char_to_token(self):
 
                 # If the entity can't be found because there isn't an exact
                 # match in the list, warn that it will be dropped
-                warnings.warn(f'The entity {ent.text} (ID: {ent.ID}) cannot '
-                      'be aligned to the tokenization, and will be dropped.')
+                warnings.warn(
+                    f'The entity {ent.text} (ID: {ent.ID}) cannot '
+                    'be aligned to the tokenization, and will be dropped.')
                 self.dropped_ents += 1
 
             else:
@@ -238,11 +240,13 @@ def char_to_token(self):
                 # Double-check that the tokens from the annotation file match up
                 # with the tokens in the source text.
                 ent_tok_text = [tok.text.lower() for tok in processed_ent]
-                doc_tok_text = [tok.text.lower() for i, tok in enumerate(tok_text)
-                                if i >= ent_tok_start and i <= ent_tok_end]
+                doc_tok_text = [
+                    tok.text.lower() for i, tok in enumerate(tok_text)
+                    if i >= ent_tok_start and i <= ent_tok_end
+                ]
                 if ent_tok_text != doc_tok_text:
                     msg = ('The annotation file and source document disagree '
-                            f'on the tokens for entity {ent.text} (ID: '
+                           f'on the tokens for entity {ent.text} (ID: '
                            f'{ent.ID}). This entity will be dropped.')
                     warnings.warn(msg)
                     self.dropped_ents += 1
@@ -257,9 +261,10 @@ def char_to_token(self):
         # Set the list of entities that had token matches as ents for doc
         self.ents = ent_list_tokens
 
-        print(f'Completed character to token conversion for doc {self.doc_key}. '
-                f'{self.dropped_ents} of {self.total_original_ents} entities '
-                'were dropped due to tokenization mismatches.')
+        print(
+            f'Completed character to token conversion for doc {self.doc_key}. '
+            f'{self.dropped_ents} of {self.total_original_ents} entities '
+            'were dropped due to tokenization mismatches.')
 
 
 class Ent:
@@ -379,11 +384,12 @@ def format_bin_rels_dygiepp(rel_list, sent_idx_tups):
             for rel in rel_list:
                 # Check to make sure both entities actually have token starts
                 if rel.arg1.tok_start == None or rel.arg2.tok_start == None:
-                    warnings.warn('Either the start or end token for relation '
-                            f'{rel.arg1.text} -- {rel.label} -- {rel.arg2.text} '
-                            f'(ID: {rel.ID}) was dropped due to tokenization '
-                            'mismatches. This relation will also be dropped '
-                            'as a result.')
+                    warnings.warn(
+                        'Either the start or end token for relation '
+                        f'{rel.arg1.text} -- {rel.label} -- {rel.arg2.text} '
+                        f'(ID: {rel.ID}) was dropped due to tokenization '
+                        'mismatches. This relation will also be dropped '
+                        'as a result.')
                     dropped_rels += 1
                     continue
                 rel_start = rel.arg1.tok_start
@@ -477,10 +483,11 @@ def format_events_dygiepp(event_list, sent_idx_tups):
                 # have token starts
                 # First, check the trigger
                 if event.trigger.tok_start == None or event.trigger.tok_end == None:
-                    warnings.warn(f'The trigger for event ID: {event.ID} '
-                            f'(trigger: {event.trigger.text} was dropped due '
-                            'to tokenization mismatches. This event will be '
-                            'dropped as a result.')
+                    warnings.warn(
+                        f'The trigger for event ID: {event.ID} '
+                        f'(trigger: {event.trigger.text} was dropped due '
+                        'to tokenization mismatches. This event will be '
+                        'dropped as a result.')
                     dropped_events += 1
                     continue
                 # Then check all the arguments in the event
@@ -489,9 +496,10 @@ def format_events_dygiepp(event_list, sent_idx_tups):
                     if arg_obj.tok_start == None or arg_obj.tok_end == None:
                         any_missing_arg = True
                 if any_missing_arg:
-                    warnings.warn(f'One or more arguments for event ID: '
-                    f'{event.ID} were dropped due to tokenization mismatches. '
-                    'This event will be dropped as a result.')
+                    warnings.warn(
+                        f'One or more arguments for event ID: '
+                        f'{event.ID} were dropped due to tokenization mismatches. '
+                        'This event will be dropped as a result.')
                     dropped_events += 1
                     continue
 
@@ -583,10 +591,12 @@ def format_corefs_dygiepp(equiv_rels_list):
                 if arg.tok_start == None or arg.tok_end == None:
                     any_missing_args = True
             if any_missing_args:
-                warnings.warn('One or more arguments in the coreference '
-                        f'cluster EquivRel ID: {equiv_rel.ID} was dropped '
-                        'Due to entity tokenization mismatches. This '
-                        'coreference will also be dropped as a reult.')
+                arg_texts = [arg.text for arg in equiv_rel.args]
+                warnings.warn(
+                    'One or more arguments in the coreference '
+                    f'cluster {equiv_rel.label, arg_texts} was dropped '
+                    'Due to entity tokenization mismatches. This '
+                    'coreference will also be dropped as a reult.')
                 dropped_equiv_rels += 1
                 continue
 
diff --git a/scripts/new-dataset/brat_to_input.py b/scripts/new-dataset/brat_to_input.py
index e460697..c8b9eab 100644
--- a/scripts/new-dataset/brat_to_input.py
+++ b/scripts/new-dataset/brat_to_input.py
@@ -46,16 +46,16 @@ def format_annotated_document(fname_pair, dataset_name, nlp, coref):
     res = annotated_doc.format_dygiepp()
 
     # Get the numbers of dropped entities and relations for this document
-    dropped_totals = {'dropped_ents': annotated_doc.dropped_ents,
-                    'total_original_ents': annotated_doc.total_original_ents,
-                    'dropped_rels': annotated_doc.dropped_rels,
-                    'total_original_rels': annotated_doc.total_original_rels,
-                    'dropped_equiv_rels': annotated_doc.dropped_equiv_rels,
-                    'total_original_equiv_rels':
-                    annotated_doc.total_original_equiv_rels,
-                    'dropped_events': annotated_doc.dropped_events,
-                    'total_original_events':
-                    annotated_doc.total_original_events}
+    dropped_totals = {
+        'dropped_ents': annotated_doc.dropped_ents,
+        'total_original_ents': annotated_doc.total_original_ents,
+        'dropped_rels': annotated_doc.dropped_rels,
+        'total_original_rels': annotated_doc.total_original_rels,
+        'dropped_equiv_rels': annotated_doc.dropped_equiv_rels,
+        'total_original_equiv_rels': annotated_doc.total_original_equiv_rels,
+        'dropped_events': annotated_doc.dropped_events,
+        'total_original_events': annotated_doc.total_original_events
+    }
 
     return res, dropped_totals
 
@@ -115,18 +115,28 @@ def format_labeled_dataset(data_directory, output_file, dataset_name,
     paired_files = get_paired_files(all_files)
 
     # Format doc file pairs
-    overall_dropped_totals = {'dropped_ents':0, 'total_original_ents':0,
-            'dropped_rels':0, 'total_original_rels':0, 'dropped_equiv_rels':0,
-            'total_original_equiv_rels':0, 'dropped_events':0,
-            'total_original_events':0}
+    overall_dropped_totals = {
+        'dropped_ents': 0,
+        'total_original_ents': 0,
+        'dropped_rels': 0,
+        'total_original_rels': 0,
+        'dropped_equiv_rels': 0,
+        'total_original_equiv_rels': 0,
+        'dropped_events': 0,
+        'total_original_events': 0
+    }
     res = []
     for fname_pair in paired_files:
-        r, dropped_totals = format_annotated_document(fname_pair, dataset_name, nlp, coref)
+        r, dropped_totals = format_annotated_document(fname_pair, dataset_name,
+                                                      nlp, coref)
         res.append(r)
-        overall_dropped_totals = {k: v + dropped_totals[k] for k, v in
-                overall_dropped_totals.items()}
+        overall_dropped_totals = {
+            k: v + dropped_totals[k]
+            for k, v in overall_dropped_totals.items()
+        }
 
-    print('\n\nCompleted conversion for entire dataset! '
+    print(
+        '\n\nCompleted conversion for entire dataset! '
         f'{overall_dropped_totals["dropped_ents"]} of '
         f'{overall_dropped_totals["total_original_ents"]} original entities '
         'were dropped due to tokenization mismatches. As a result, '