Skip to content

Commit

Permalink
#11 Achieve order of magnitude speedup of parser
Browse files Browse the repository at this point in the history
- use faster pyparsing grammar constructs and
  arrangements to significantly speed up parsing
- enable skipped parsing unit tests now that they
  are not infeasibly slow
- add testing requirements to setup.py

Anecdotal speed increase is from about 30 seconds
to run the `test_date_values` tests down to below
3 seconds.

See also #17
  • Loading branch information
jmurty committed Jun 6, 2017
1 parent db205ac commit e713c06
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 71 deletions.
144 changes: 75 additions & 69 deletions edtf/parser/grammar.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,47 @@
from pyparsing import Literal as L, ParseException, Optional, OneOrMore, \
ZeroOrMore, Regex, Or, Combine
ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums

# (* ************************** Level 0 *************************** *)
from parser_classes import Date, DateAndTime, Interval, UncertainOrApproximate, \
Unspecified, Level1Interval, LongYear, Season, \
from parser_classes import Date, DateAndTime, Interval, Unspecified, \
UncertainOrApproximate, Level1Interval, LongYear, Season, \
PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \
Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \
MaskedPrecision, Level2Interval, ExponentialYear

from edtf_exceptions import EDTFParseException

oneThru12 = L("01") ^ "02" ^ "03" ^ "04" ^ "05" ^ "06" ^ "07" ^ "08" ^ \
"09" ^ "10" ^ "11" ^ "12"
oneThru13 = oneThru12 ^ "13"
oneThru23 = oneThru13 ^ "14" ^ "15" ^ "16" ^ "17" ^ "18" ^ "19" ^ "20" ^ \
"21" ^ "22" ^ "23"
zeroThru23 = L("00") ^ oneThru23
oneThru29 = oneThru23 ^ "24" ^ "25" ^ "26" ^ "27" ^ "28" ^ "29"
oneThru30 = oneThru29 ^ "30"
oneThru31 = oneThru30 ^ "31"
oneThru59 = oneThru31 ^ "32" ^ "33" ^ "34" ^ "35" ^ "36" ^ "37" ^ "38" ^ \
"39" ^ "40" ^ "41" ^ "42" ^ "43" ^ "44" ^ "45" ^ "46" ^ "47" ^ "48" ^ \
"49" ^ "50" ^ "51" ^ "52" ^ "53" ^ "54" ^ "55" ^ "56" ^ "57" ^ "58" ^ "59"
zeroThru59 = L("00") ^ oneThru59

positiveDigit = L("1") ^ "2" ^ "3" ^ "4" ^ "5" ^ "6" ^ "7" ^ "8" ^ "9"
digit = positiveDigit ^ "0"
oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)])
oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)])
oneThru23 = oneOf(['%.2d' % i for i in range(1, 24)])
zeroThru23 = oneOf(['%.2d' % i for i in range(0, 23)])
oneThru29 = oneOf(['%.2d' % i for i in range(1, 30)])
oneThru30 = oneOf(['%.2d' % i for i in range(1, 31)])
oneThru31 = oneOf(['%.2d' % i for i in range(1, 32)])
oneThru59 = oneOf(['%.2d' % i for i in range(1, 60)])
zeroThru59 = oneOf(['%.2d' % i for i in range(0, 60)])

positiveDigit = Word(nums, exact=1, excludeChars='0')
digit = Word(nums, exact=1)

second = zeroThru59
minute = zeroThru59
hour = zeroThru23
day = oneThru31("day")

month = oneThru12("month")
monthDay = \
(
(L("01") ^ "03" ^ "05" ^ "07" ^ "08" ^ "10" ^ "12")("month") + "-"
+ oneThru31("day")
) \
^ ((L("04") ^ "06" ^ "09" ^ "11")("month") + "-" + oneThru30("day")) \
^ (L("02")("month") + "-" + oneThru29("day"))
monthDay = (
(oneOf("01 03 05 07 08 10 12")("month") + "-" + oneThru31("day")) ^
(oneOf("04 06 09 11")("month") + "-" + oneThru30("day")) ^
(L("02")("month") + "-" + oneThru29("day"))
)

positiveYear = (
(positiveDigit + digit + digit + digit)
^ (digit + positiveDigit + digit + digit)
^ (digit + digit + positiveDigit + digit)
^ (digit + digit + digit + positiveDigit)
) #4 digits, at least one of which is positive
# 4 digits, 0 to 9
positiveYear = Word(nums, exact=4)

negativeYear = ("-" + positiveYear)
# Negative version of positive year, but "-0000" is illegal
negativeYear = NotAny(L("-0000")) + ("-" + positiveYear)

year = Combine(positiveYear ^ negativeYear ^ L("0000"))("year")
year = Combine(positiveYear ^ negativeYear)("year")

yearMonth = year + "-" + month
yearMonthDay = year + "-" + monthDay # o hai iso date
Expand All @@ -59,14 +50,13 @@
Date.set_parser(date)

zoneOffsetHour = oneThru13
zoneOffset = L("Z") ^ \
(
(L("+") ^ "-") + (
(zoneOffsetHour + Optional(":" + minute)) ^
"14:00" ^
("00:" + oneThru59)
)
zoneOffset = L("Z") ^ (
Regex("[+-]") + (
zoneOffsetHour + Optional(":" + minute) ^
L("14:00") ^
("00:" + oneThru59)
)
)

baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00")

Expand All @@ -84,10 +74,10 @@
# (* ************************** Level 1 *************************** *)

# (* ** Auxiliary Assignments for Level 1 ** *)
UASymbol = Combine(L("?") ^ L("~") ^ L("?~"))
UASymbol = Combine(oneOf("? ~ ?~"))
UA.set_parser(UASymbol)

seasonNumber = L("21") ^ "22" ^ "23" ^ "24"
seasonNumber = oneOf("21 22 23 24")

# (* *** Season (unqualified) *** *)
season = year + "-" + seasonNumber("season")
Expand Down Expand Up @@ -120,10 +110,12 @@ def f(toks):
Level1Interval.set_parser(level1Interval)

# (* *** unspecified *** *)
yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ 'u') + 'u')("year")
monthUnspecified = year + "-" + Combine("uu")("month")
dayUnspecified = yearMonth + "-" + Combine("uu")("day")
dayAndMonthUnspecified = year + "-" + Combine("uu")("month") + "-" + Combine("uu")("day")
yearWithOneOrTwoUnspecifedDigits = Combine(
digit + digit + (digit ^ 'u') + 'u'
)("year")
monthUnspecified = year + "-" + L("uu")("month")
dayUnspecified = yearMonth + "-" + L("uu")("day")
dayAndMonthUnspecified = year + "-" + L("uu")("month") + "-" + L("uu")("day")

unspecified = yearWithOneOrTwoUnspecifedDigits \
^ monthUnspecified \
Expand All @@ -146,29 +138,43 @@ def f(toks):

# (* ** Internal Unspecified** *)

positiveDigitOrU = positiveDigit ^ "u"
digitOrU = positiveDigitOrU ^ "0"
oneThru3 = L("1") ^ "2" ^ "3"

dayWithU = oneThru31 \
^ ("u" + digitOrU) \
^ (oneThru3 + "u")

monthWithU = oneThru12 ^ "0u" ^ "1u" ^ ("u" + digitOrU)

yearWithU = (L("u") + digitOrU + digitOrU + digitOrU) \
^ (digitOrU + "u" + digitOrU + digitOrU) \
^ (digitOrU + digitOrU + "u" + digitOrU) \
^ (digitOrU + digitOrU + digitOrU + "u")

yearMonthWithU = (Combine(year("") ^ yearWithU)("year") + "-" + monthWithU("month")) \
^ (yearWithU("year") + "-" + month)

monthDayWithU = (Combine(month("") ^ monthWithU)("month") + "-" + Combine(dayWithU)("day")) \
^ (monthWithU("month") + "-" + day)
digitOrU = Word(nums + 'u', exact=1)

# 2-digit day with at least one 'u' present
dayWithU = Combine(
("u" + digitOrU) ^
(digitOrU + 'u')
)("day")

# 2-digit month with at least one 'u' present
monthWithU = Combine(
oneOf("0u 1u") ^
("u" + digitOrU)
)("month")

# 4-digit year with at least one 'u' present
yearWithU = Combine(
('u' + digitOrU + digitOrU + digitOrU) ^
(digitOrU + 'u' + digitOrU + digitOrU) ^
(digitOrU + digitOrU + 'u' + digitOrU) ^
(digitOrU + digitOrU + digitOrU + 'u')
)("year")

yearMonthDayWithU = (Combine(yearWithU ^ year(""))("year") + "-" + monthDayWithU) \
^ (yearWithU("year") + "-" + monthDay)
yearMonthWithU = (
(Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) ^
(yearWithU + "-" + month)
)

monthDayWithU = (
(Combine(month("") ^ monthWithU)("month") + "-" + dayWithU) ^
(monthWithU + "-" + day)
)

yearMonthDayWithU = (
(yearWithU + "-" + Combine(month("") ^ monthWithU)("month") + "-" + Combine(day("") ^ dayWithU(""))("day")) ^
(year + "-" + monthWithU + "-" + Combine(day("") ^ dayWithU(""))("day")) ^
(year + "-" + month + "-" + dayWithU)
)

partialUnspecified = yearWithU ^ yearMonthWithU ^ yearMonthDayWithU
PartialUnspecified.set_parser(partialUnspecified)
Expand Down
3 changes: 1 addition & 2 deletions edtf/parser/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,16 +189,15 @@
'y17e7-12-26', # not implemented
'2016-13-08', # wrong day order
'2016-02-39', # out of range
'-0000-01-01', # negative zero year
)

class TestParsing(unittest.TestCase):


def test_non_parsing(self):
for i in BAD_EXAMPLES:
self.assertRaises(EDTFParseException, parse, i)

@unittest.skip("takes a long time")
def test_date_values(self):
"""
Test that every EDTFObject can tell you its lower and upper
Expand Down
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ def readme():
'python-dateutil',
'pyparsing',
],
extras_require={
'test': [
'django',
'nose',
],
},
setup_requires=[
'setuptools_scm',
],
Expand Down

0 comments on commit e713c06

Please sign in to comment.