Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/recski/tuw-nlp into main
Browse files Browse the repository at this point in the history
  • Loading branch information
adaamko committed Apr 12, 2022
2 parents 8decbbe + 31e1527 commit 714ef5a
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 13 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ python services/text_to_4lang/backend/service.py
Then run the frontend with this command:

```
streamlit run services/text_to_4lang/frontend/extract.py
streamlit run services/text_to_4lang/frontend/demo.py
```

In the demo you can parse english and german sentences and you can also try out multiple algorithms our graphs implement, such as `expand`, `substitute` and `append_zero_paths`.
Expand Down
4 changes: 4 additions & 0 deletions tuw_nlp/grammar/lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,10 @@ def get_mod_edges(self):
("VERB", "ADVMOD", "ADV"),
# nicht staffeln, sample 10
("VERB", "ADVMOD", "PART"),
# nicht mehr als
("CCONJ", "ADVMOD", "ADV"),
# nicht mehr als
("ADV", "ADVMOD", "PART"),
# sample 112 of sample_10
("VERB", "ADVCL", "VERB"),
# nicht gewaehlt... , weil er gegen die Homo-Ehe... (Germeval '18)
Expand Down
6 changes: 4 additions & 2 deletions tuw_nlp/grammar/text_to_4lang.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def expand(self, graph, depth=1, substitute=False, expand_set=set(), strategy="N
graph, d_node, definition, substitute, strategy)
if expand_set:
expand_set |= set(definition_nodes)
else:
print('no definition for ' + node)

self.expand(graph, depth-1, substitute=substitute,
expand_set=expand_set, strategy=strategy)
Expand All @@ -88,8 +90,8 @@ def parse(self, sen):
return relabeled_graph, self.graph_lexical.vocab.get_id(
graph.nodes[root]["name"])

def __call__(self, text, depth=0, substitute=False, expand_set=set(), strategy="None"):
for sen in self.nlp(text).sentences:
def __call__(self, text, depth=0, substitute=False, expand_set=set(), strategy="None", ssplit=True):
for sen in self.nlp(text, ssplit=ssplit).sentences:
graph, root = self.parse(sen)

fourlang = FourLang(graph, root, self.graph_lexical)
Expand Down
15 changes: 10 additions & 5 deletions tuw_nlp/graph/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import re
from copy import deepcopy
from itertools import chain
Expand Down Expand Up @@ -156,7 +157,7 @@ def match(self, graph, return_subgraphs=False):
for p in patt:
matcher = DiGraphMatcher(
graph, p, node_match=GraphFormulaMatcher.node_matcher, edge_match=GraphFormulaMatcher.edge_matcher)

monomorphic_subgraphs = list(matcher.subgraph_monomorphisms_iter())
if not len(monomorphic_subgraphs) == 0:
mapping = monomorphic_subgraphs[0]
Expand All @@ -170,9 +171,9 @@ def match(self, graph, return_subgraphs=False):
if pos_match:
if return_subgraphs:
yield key, i, subgraphs
else:
else:
yield key, i


def gen_subgraphs(M, no_edges):
"""M must be dict of dicts, see networkx.convert.to_dict_of_dicts.
Expand Down Expand Up @@ -263,9 +264,13 @@ def graph_to_pn(graph):

G = pn.Graph(pn_nodes + pn_edges)

try:
# two spaces before edge name, because alto does it :)
return pn.encode(G, indent=0).replace('\n', ' ')

return pn.encode(G, indent=0).replace('\n', ' ')
except pn.exceptions.LayoutError as e:
words = [graph.nodes[node]['name'] for node in graph.nodes()]
logging.error(f'pn.encode failed on this graph: {words}')
raise e

def read_alto_output(raw_dl):
id_to_word = {}
Expand Down
4 changes: 3 additions & 1 deletion tuw_nlp/text/patterns/de.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
"Kat.G.",
"lit.",
"ONr.",
'bzw.'
'bzw.',
'Pkt.',
"Dipl.-Ing."
]

MONTH = [
Expand Down
8 changes: 4 additions & 4 deletions tuw_nlp/text/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ def __init__(self, stanza_pipeline, cache_path, init=None):

self.changed = False

def parse(self, text):
def parse(self, text, ssplit):
if self.nlp is None:
self.nlp = self.init()

return self.nlp(text)
return self.nlp(text) if ssplit else self.nlp.additional(text)

def __call__(self, text):
def __call__(self, text, ssplit=True):
if text not in self.parsed:
self.parsed[text] = self.parse(text)
self.parsed[text] = self.parse(text, ssplit=ssplit)
self.changed = True

return self.parsed[text]
Expand Down

0 comments on commit 714ef5a

Please sign in to comment.