From 134eb73d8c53a71e6cd6f34b17a91266d69c88e1 Mon Sep 17 00:00:00 2001 From: keithsterling Date: Fri, 24 Mar 2017 16:48:49 +0000 Subject: [PATCH] Fixed issue with spaces between punctuation, and stop parser crashing for invalid xml, now reports issue with file --- src/programy/parser/aiml_parser.py | 31 ++++++++-------- src/programy/processors/post/cleanup.py | 18 +++++----- src/programy/utils/files/filefinder.py | 8 ++++- .../aiml_tests/train_tests/test_now_ask_me.py | 2 +- .../aiml_tests/train_tests/test_train_aiml.py | 2 +- src/test/parser/invalid.aiml | 9 +++++ src/test/parser/test_aiml_parser.py | 9 +++++ src/test/parser/valid.aiml | 35 +++++++++++++++++++ 8 files changed, 84 insertions(+), 30 deletions(-) create mode 100644 src/test/parser/invalid.aiml create mode 100644 src/test/parser/valid.aiml diff --git a/src/programy/parser/aiml_parser.py b/src/programy/parser/aiml_parser.py index 6bf86ffe3..223db7d8d 100644 --- a/src/programy/parser/aiml_parser.py +++ b/src/programy/parser/aiml_parser.py @@ -32,9 +32,10 @@ def __init__(self, aiml_parser): self.aiml_parser = aiml_parser def load_file_contents(self, filename): - logging.info(filename) - return self.aiml_parser.parse_from_file(filename) - + try: + return self.aiml_parser.parse_from_file(filename) + except Exception as e: + logging.exception("Failed to load contents of file from [%s]"%filename, e) class AIMLParser(object): def __init__(self, supress_warnings=False, stop_on_invalid=False): @@ -75,20 +76,16 @@ def parse_from_file(self, filename): logging.info("Loading aiml file file: " + self._filename) - tree = ET.parse(filename, parser=LineNumberingParser()) - aiml = tree.getroot() - - if aiml is None or aiml.tag != 'aiml': - raise ParserException("Error, root tag is not ", filename=filename) - else: - try: + try: + tree = ET.parse(filename, parser=LineNumberingParser()) + aiml = tree.getroot() + if aiml is None or aiml.tag != 'aiml': + raise ParserException("Error, root tag is not ", filename=filename) + else: self.parse_aiml(aiml, filename) - except ParserException as parser_excep: - parser_excep.filename = filename - raise parser_excep - except ET.ParseError as xmlpe: - xmlpe.filename = filename - xmlpe.xml_exception = xmlpe + except Exception as e: + logging.error("Failed to load contents of AIML file from [%s] - [%s]"%(filename, e)) + def parse_from_text(self, text): """ @@ -100,7 +97,7 @@ def parse_from_text(self, text): aiml = ET.fromstring(text) if aiml is None or aiml.tag != 'aiml': - ParserException("Error, root tag is not ", filename="text") + raise ParserException("Error, root tag is not ", filename="text") else: self.parse_aiml(aiml, "text") diff --git a/src/programy/processors/post/cleanup.py b/src/programy/processors/post/cleanup.py index d88098cd9..f9bdf693f 100644 --- a/src/programy/processors/post/cleanup.py +++ b/src/programy/processors/post/cleanup.py @@ -16,6 +16,8 @@ import logging +import re + from programy.processors.processing import PostProcessor class CleanUpPostProcessor(PostProcessor): @@ -24,13 +26,9 @@ def __init__(self): def process(self, bot, clientid, string): logging.debug("Cleaning up output...") - stripped = string.strip() - if stripped.endswith(" ."): - stripped = stripped[:len(stripped)-2] + "." - return stripped - - # - #first = stripped[:1] - #rest = stripped[1:] - #result = first.upper() + rest.lower() - #return result + + pass1 = re.split(r"""("[^"]*"|'[^']*')""", string) + pass2 = [val.strip() for val in pass1] + pass3 = " ".join(re.sub(r'("\s+)(.*)(\s+")', r'"\2"', val) for val in pass2) + pass4 = re.sub(r'\s+([,:;?.!](?:\s|$))', r'\1', pass3) + return pass4 diff --git a/src/programy/utils/files/filefinder.py b/src/programy/utils/files/filefinder.py index 9fbec2319..3a2587e88 100644 --- a/src/programy/utils/files/filefinder.py +++ b/src/programy/utils/files/filefinder.py @@ -14,7 +14,9 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +import logging import os + from abc import ABCMeta, abstractmethod class FileFinder(object): @@ -54,6 +56,10 @@ def load_dir_contents(self, path_to_sets, subdir=False, extension=".txt"): else: filename = file[0] filename = filename.upper() - collection[filename] = self.load_file_contents(file[1]) + + try: + collection[filename] = self.load_file_contents(file[1]) + except Exception as e: + logging.error ("Failed to load file contents for file [%s]"%file[1]) return collection diff --git a/src/test/aiml_tests/train_tests/test_now_ask_me.py b/src/test/aiml_tests/train_tests/test_now_ask_me.py index fb58e89df..629a45fe9 100644 --- a/src/test/aiml_tests/train_tests/test_now_ask_me.py +++ b/src/test/aiml_tests/train_tests/test_now_ask_me.py @@ -23,4 +23,4 @@ def test_now_ask_me(self): response = TrainAIMLTests.test_client.bot.ask_question("test", "daddy is great") self.assertIsNotNone(response) #TODO Sort out space in questions - self.assertEqual("Now you can ask me: \" Who IS GREAT \"? and \" What does my DADDY BE \"?", response) + self.assertEqual('Now you can ask me: "Who IS GREAT"? and "What does my DADDY BE"?', response) diff --git a/src/test/aiml_tests/train_tests/test_train_aiml.py b/src/test/aiml_tests/train_tests/test_train_aiml.py index b6ac5150d..9a37db283 100644 --- a/src/test/aiml_tests/train_tests/test_train_aiml.py +++ b/src/test/aiml_tests/train_tests/test_train_aiml.py @@ -54,7 +54,7 @@ def test_train_pronoun(self): self.assertIsNotNone(response) #TODO Fix the spacing in quotes - self.assertEqual("Now you can ask me: \" Who LIKES TO SMOKE CIGARS \"? and \" What does my MOMMY LIKE \"?", response) + self.assertEqual('Now you can ask me: "Who LIKES TO SMOKE CIGARS"? and "What does my MOMMY LIKE"?', response) response = TrainAIMLTests.test_client.bot.ask_question("test", "who likes to smoke cigars") self.assertIsNotNone(response) diff --git a/src/test/parser/invalid.aiml b/src/test/parser/invalid.aiml new file mode 100644 index 000000000..af18c1232 --- /dev/null +++ b/src/test/parser/invalid.aiml @@ -0,0 +1,9 @@ + + + + + * + + + + \ No newline at end of file diff --git a/src/test/parser/test_aiml_parser.py b/src/test/parser/test_aiml_parser.py index 69222050e..6785c0e9f 100644 --- a/src/test/parser/test_aiml_parser.py +++ b/src/test/parser/test_aiml_parser.py @@ -1,4 +1,5 @@ import unittest +import os from xml.etree.ElementTree import ParseError from programy.parser.aiml_parser import AIMLParser @@ -18,6 +19,14 @@ def setUp(self): self.parser = AIMLParser(supress_warnings=True, stop_on_invalid=True) self.assertIsNotNone(self.parser) + def test_parse_from_file_valid(self): + filename = os.path.dirname(__file__)+ '/valid.aiml' + self.parser.parse_from_file(filename) + + def test_parse_from_file_invalid(self): + filename = os.path.dirname(__file__)+ '/invalid.aiml' + self.parser.parse_from_file(filename) + def test_crud(self): with self.assertRaises(ParseError) as raised: self.parser.parse_from_text( diff --git a/src/test/parser/valid.aiml b/src/test/parser/valid.aiml new file mode 100644 index 000000000..9acf66bbe --- /dev/null +++ b/src/test/parser/valid.aiml @@ -0,0 +1,35 @@ + + + + + + + + + + + + HELLO + + + + + \ No newline at end of file