From 90a729133ccb33dec70a2160fb7f2dd13d8ce53c Mon Sep 17 00:00:00 2001
From: Gabor Recski <gabor.recski@tuwien.ac.at>
Date: Mon, 7 Mar 2022 13:06:30 +0100
Subject: [PATCH 1/2] resolved conflict, new rules in text_to_4lang lexicon

---
 README.md                        | 2 +-
 tuw_nlp/grammar/lexicon.py       | 4 ++++
 tuw_nlp/grammar/text_to_4lang.py | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 24fe0a5..4136576 100644
--- a/README.md
+++ b/README.md
@@ -74,7 +74,7 @@ python services/text_to_4lang/backend/service.py
 Then run the frontend with this command:
 
 ```
-streamlit run services/text_to_4lang/frontend/extract.py
+streamlit run services/text_to_4lang/frontend/demo.py
 ```
 
 In the demo you can parse english and german sentences and you can also try out multiple algorithms our graphs implement, such as `expand`, `substitute` and `append_zero_paths`.
diff --git a/tuw_nlp/grammar/lexicon.py b/tuw_nlp/grammar/lexicon.py
index 9f6114e..e64cd77 100644
--- a/tuw_nlp/grammar/lexicon.py
+++ b/tuw_nlp/grammar/lexicon.py
@@ -508,6 +508,10 @@ def get_mod_edges(self):
             ("VERB", "ADVMOD", "ADV"),
             # nicht staffeln, sample 10
             ("VERB", "ADVMOD", "PART"),
+            # nicht mehr als
+            ("CCONJ", "ADVMOD", "ADV"),
+            # nicht mehr als
+            ("ADV", "ADVMOD", "PART"),
             # sample 112 of sample_10
             ("VERB", "ADVCL", "VERB"),
             # nicht gewaehlt... , weil er gegen die Homo-Ehe... (Germeval '18)
diff --git a/tuw_nlp/grammar/text_to_4lang.py b/tuw_nlp/grammar/text_to_4lang.py
index 600a3a9..d730bde 100644
--- a/tuw_nlp/grammar/text_to_4lang.py
+++ b/tuw_nlp/grammar/text_to_4lang.py
@@ -74,6 +74,8 @@ def expand(self, graph, depth=1, substitute=False, expand_set=set(), strategy="N
                             graph, d_node, definition, substitute, strategy)
                         if expand_set:
                             expand_set |= set(definition_nodes)
+                    else:
+                        print('no definition for ' + node)
 
         self.expand(graph, depth-1, substitute=substitute,
                     expand_set=expand_set, strategy=strategy)

From 32e9c9d08f6a2383ecb74149f570df98c9f62df6 Mon Sep 17 00:00:00 2001
From: Gabor Recski <gabor.recski@tuwien.ac.at>
Date: Fri, 1 Apr 2022 12:08:28 +0200
Subject: [PATCH 2/2] option to disable ssplit in text_to_4lang, raise error if
 4lang graph is not connected, some more DE abbrev patterns

---
 tuw_nlp/grammar/text_to_4lang.py |  4 ++--
 tuw_nlp/graph/utils.py           | 15 ++++++++++-----
 tuw_nlp/text/patterns/de.py      |  4 +++-
 tuw_nlp/text/pipeline.py         |  8 ++++----
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/tuw_nlp/grammar/text_to_4lang.py b/tuw_nlp/grammar/text_to_4lang.py
index d730bde..96ebb6a 100644
--- a/tuw_nlp/grammar/text_to_4lang.py
+++ b/tuw_nlp/grammar/text_to_4lang.py
@@ -90,8 +90,8 @@ def parse(self, sen):
         return relabeled_graph, self.graph_lexical.vocab.get_id(
             graph.nodes[root]["name"])
 
-    def __call__(self, text, depth=0, substitute=False, expand_set=set(), strategy="None"):
-        for sen in self.nlp(text).sentences:
+    def __call__(self, text, depth=0, substitute=False, expand_set=set(), strategy="None", ssplit=True):
+        for sen in self.nlp(text, ssplit=ssplit).sentences:
             graph, root = self.parse(sen)
 
             fourlang = FourLang(graph, root, self.graph_lexical)
diff --git a/tuw_nlp/graph/utils.py b/tuw_nlp/graph/utils.py
index a02fc51..2007583 100644
--- a/tuw_nlp/graph/utils.py
+++ b/tuw_nlp/graph/utils.py
@@ -1,3 +1,4 @@
+import logging
 import re
 from copy import deepcopy
 from itertools import chain
@@ -156,7 +157,7 @@ def match(self, graph, return_subgraphs=False):
                 for p in patt:
                     matcher = DiGraphMatcher(
                         graph, p, node_match=GraphFormulaMatcher.node_matcher, edge_match=GraphFormulaMatcher.edge_matcher)
-                    
+
                     monomorphic_subgraphs = list(matcher.subgraph_monomorphisms_iter())
                     if not len(monomorphic_subgraphs) == 0:
                         mapping = monomorphic_subgraphs[0]
@@ -170,9 +171,9 @@ def match(self, graph, return_subgraphs=False):
                 if pos_match:
                     if return_subgraphs:
                         yield key, i, subgraphs
-                    else:    
+                    else:
                         yield key, i
-                    
+
 
 def gen_subgraphs(M, no_edges):
     """M must be dict of dicts, see networkx.convert.to_dict_of_dicts.
@@ -263,9 +264,13 @@ def graph_to_pn(graph):
 
     G = pn.Graph(pn_nodes + pn_edges)
 
+    try:
     # two spaces before edge name, because alto does it :)
-    return pn.encode(G, indent=0).replace('\n', '  ')
-
+        return pn.encode(G, indent=0).replace('\n', '  ')
+    except pn.exceptions.LayoutError as e:
+        words = [graph.nodes[node]['name'] for node in graph.nodes()]
+        logging.error(f'pn.encode failed on this graph: {words}')
+        raise e
 
 def read_alto_output(raw_dl):
     id_to_word = {}
diff --git a/tuw_nlp/text/patterns/de.py b/tuw_nlp/text/patterns/de.py
index b9acad7..1aa22aa 100644
--- a/tuw_nlp/text/patterns/de.py
+++ b/tuw_nlp/text/patterns/de.py
@@ -6,7 +6,9 @@
     "Kat.G.",
     "lit.",
     "ONr.",
-    'bzw.'
+    'bzw.',
+    'Pkt.',
+    "Dipl.-Ing."
 ]
 
 MONTH = [
diff --git a/tuw_nlp/text/pipeline.py b/tuw_nlp/text/pipeline.py
index cd1bcae..355d20d 100644
--- a/tuw_nlp/text/pipeline.py
+++ b/tuw_nlp/text/pipeline.py
@@ -46,15 +46,15 @@ def __init__(self, stanza_pipeline, cache_path, init=None):
 
         self.changed = False
 
-    def parse(self, text):
+    def parse(self, text, ssplit):
         if self.nlp is None:
             self.nlp = self.init()
 
-        return self.nlp(text)
+        return self.nlp(text) if ssplit else self.nlp.additional(text)
 
-    def __call__(self, text):
+    def __call__(self, text, ssplit=True):
         if text not in self.parsed:
-            self.parsed[text] = self.parse(text)
+            self.parsed[text] = self.parse(text, ssplit=ssplit)
             self.changed = True
 
         return self.parsed[text]