Skip to content

Commit

Permalink
Make Wikicode.matches() handle namespaces.
Browse files Browse the repository at this point in the history
Fixes earwig#302.
  • Loading branch information
roysmith committed Jan 10, 2025
1 parent a1beb77 commit 1223515
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 4 deletions.
32 changes: 28 additions & 4 deletions src/mwparserfromhell/wikicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,19 +516,43 @@ def matches(self, other):
adjusted. Specifically, whitespace and markup is stripped and the first
letter's case is normalized. Typical usage is
``if template.name.matches("stub"): ...``.
If either side has any colons, everything before the last colon is taken to be
a namespace and/or interwiki prefix. The parts before and after the colon are
normalized and compared separately; both must match for the result to be True.
"""
normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s
this = normalize(self.strip_code().strip())
this = self.strip_code().strip()
this_prefix, this_postfix = self._split_and_normalize(this)

if isinstance(other, (str, bytes, Wikicode, Node)):
that = parse_anything(other).strip_code().strip()
return this == normalize(that)
that_prefix, that_postfix = self._split_and_normalize(that)
return (this_prefix, this_postfix) == (that_prefix, that_postfix)

for obj in other:
that = parse_anything(obj).strip_code().strip()
if this == normalize(that):
that_prefix, that_postfix = self._split_and_normalize(that)
if (this_prefix, this_postfix) == (that_prefix, that_postfix):
return True
return False

def _split_and_normalize(self, s):
"""Split a page title into a prefix (everything before the last colon)
and a postfix (everything after the last colon). Both parts are normalized
according to the rules specific to that part (the prefix is case-insensitive,
while the postfix is only case insensitive in the first character) before being
returned.
If there is no prefix, the returned prefix is an empty string.
"""
normalize = lambda s: (s[0].upper() + s[1:]).replace("_", " ") if s else s
m = re.match(r'(.*):(.*)', s)
if m:
return normalize(m[1]).lower(), normalize(m[2])
else:
return "", normalize(s)



def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None):
"""Iterate over nodes in our list matching certain conditions.
Expand Down
9 changes: 9 additions & 0 deletions tests/test_wikicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,8 @@ def test_matches():
code3 = parse("Hello world!")
code4 = parse("World,_hello?")
code5 = parse("")
code6 = parse("File:Foo")
code7 = parse("Talk:foo")
assert code1.matches("Cleanup") is True
assert code1.matches("cleanup") is True
assert code1.matches(" cleanup\n") is True
Expand All @@ -386,6 +388,13 @@ def test_matches():
assert code5.matches("") is True
assert code5.matches("<!-- nothing -->") is True
assert code5.matches(("a", "b", "")) is True
assert code6.matches("File:Foo") is True
assert code6.matches("File:foo") is True
assert code6.matches("FILE:FOO") is False
assert code6.matches("file:foo") is True
assert code6.matches("FiLe:foo") is True
assert code6.matches("FiLE:Foo") is True
assert code7.matches("Talk:Foo") is True


def test_filter_family():
Expand Down

0 comments on commit 1223515

Please sign in to comment.