From f3b713fc5a45dc7837948b4a85c13d7f9e4cd168 Mon Sep 17 00:00:00 2001 From: jhan Date: Mon, 12 Apr 2021 23:31:13 +0200 Subject: [PATCH 1/9] Fixing cpe data split --- repology/parsers/parsers/gentoo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/repology/parsers/parsers/gentoo.py b/repology/parsers/parsers/gentoo.py index e6231a352..e402673d3 100644 --- a/repology/parsers/parsers/gentoo.py +++ b/repology/parsers/parsers/gentoo.py @@ -16,6 +16,7 @@ # along with repology. If not, see . import os +import re import xml.etree.ElementTree from dataclasses import dataclass, field from typing import Dict, Iterable, List, Optional, Set, Tuple @@ -180,7 +181,7 @@ def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTra pkg.add_maintainers(xml_metadata.maintainers) if xml_metadata.cpe is not None: - cpe = xml_metadata.cpe.split(':') + cpe = re.split(r"(? Date: Tue, 13 Apr 2021 00:05:21 +0200 Subject: [PATCH 2/9] Replacing double quotes --- repology/parsers/parsers/gentoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repology/parsers/parsers/gentoo.py b/repology/parsers/parsers/gentoo.py index e402673d3..69ce811d4 100644 --- a/repology/parsers/parsers/gentoo.py +++ b/repology/parsers/parsers/gentoo.py @@ -181,7 +181,7 @@ def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTra pkg.add_maintainers(xml_metadata.maintainers) if xml_metadata.cpe is not None: - cpe = re.split(r"(? Date: Tue, 13 Apr 2021 19:19:32 +0200 Subject: [PATCH 3/9] Adding cpe_parse function --- repology/parsers/cpe.py | 36 ++++++++++++++++++++++++ repology/parsers/parsers/gentoo.py | 11 ++++++-- repology/test/test_cpe.py | 44 ++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 repology/parsers/cpe.py create mode 100755 repology/test/test_cpe.py diff --git a/repology/parsers/cpe.py b/repology/parsers/cpe.py new file mode 100644 index 000000000..14a6f398e --- /dev/null +++ b/repology/parsers/cpe.py @@ -0,0 +1,36 @@ +# Copyright (C) 2019 Dmitry Marakasov +# +# This file is part of repology +# +# repology is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# repology is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with repology. If not, see . + +def cpe_parse(cpe_str: str) -> []: + escaped = False + current = '' + res = [] + + for char in cpe_str: + if escaped: + current += '\\' + char + escaped = False + elif char == '\\': + escaped = True + elif char == ':': + res.append(current) + current = '' + else: + current += char + + res.append(current) + return res diff --git a/repology/parsers/parsers/gentoo.py b/repology/parsers/parsers/gentoo.py index 69ce811d4..09535ff8e 100644 --- a/repology/parsers/parsers/gentoo.py +++ b/repology/parsers/parsers/gentoo.py @@ -16,7 +16,6 @@ # along with repology. If not, see . import os -import re import xml.etree.ElementTree from dataclasses import dataclass, field from typing import Dict, Iterable, List, Optional, Set, Tuple @@ -25,6 +24,7 @@ from repology.package import PackageFlags from repology.packagemaker import NameType, PackageFactory, PackageMaker from repology.parsers import Parser +from repology.parsers.cpe import cpe_parse from repology.parsers.maintainers import extract_maintainers from repology.parsers.versions import VersionStripper from repology.transformer import PackageTransformer @@ -181,8 +181,13 @@ def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTra pkg.add_maintainers(xml_metadata.maintainers) if xml_metadata.cpe is not None: - cpe = re.split(r'(? +# +# This file is part of repology +# +# repology is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# repology is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with repology. If not, see . + +# mypy: no-disallow-untyped-calls + +import unittest + +from repology.parsers.cpe import cpe_parse + + +class TestCpe(unittest.TestCase): + def test_cpe_parse(self) -> None: + self.assertEqual(cpe_parse('foo:bar'), ['foo', 'bar']) + self.assertEqual(cpe_parse('foobar'), ['foobar']) + self.assertEqual(cpe_parse('cpe:2.3:a:andreas_mueller:cdrdao'), ['cpe', '2.3', 'a', 'andreas_mueller', 'cdrdao']) + self.assertEqual(cpe_parse('cpe:/a:archive\\:\\:tar_project:archive\\:\\:tar'), ['cpe', '/a', 'archive\\:\\:tar_project', 'archive\\:\\:tar']) + self.assertEqual(cpe_parse('foo\\:bar'), ['foo\\:bar']) + self.assertEqual(cpe_parse('foo\\\\:bar'), ['foo\\\\', 'bar']) + + @unittest.expectedFailure + def test_cpe_parse_failure(self) -> None: + self.assertEqual(cpe_parse('a:b'), ['ab']) + self.assertEqual(cpe_parse('foo\\:bar'), ['foo:bar']) + self.assertEqual(cpe_parse('foo\\\\:bar'), ['foo', 'bar']) + + +if __name__ == '__main__': + unittest.main() From 583519f03537aa7be5d10df1540a8e118eaa398f Mon Sep 17 00:00:00 2001 From: jhan Date: Tue, 13 Apr 2021 19:35:24 +0200 Subject: [PATCH 4/9] Fixing expression --- repology/parsers/cpe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repology/parsers/cpe.py b/repology/parsers/cpe.py index 14a6f398e..d38934e91 100644 --- a/repology/parsers/cpe.py +++ b/repology/parsers/cpe.py @@ -15,7 +15,7 @@ # You should have received a copy of the GNU General Public License # along with repology. If not, see . -def cpe_parse(cpe_str: str) -> []: +def cpe_parse(cpe_str: str) -> list[...]: escaped = False current = '' res = [] From 5f00c0d04254b2bdd3dc5f57a363317e30587193 Mon Sep 17 00:00:00 2001 From: sicota Date: Tue, 13 Apr 2021 19:53:57 +0200 Subject: [PATCH 5/9] Adding List --- repology/parsers/cpe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/repology/parsers/cpe.py b/repology/parsers/cpe.py index d38934e91..4ef663623 100644 --- a/repology/parsers/cpe.py +++ b/repology/parsers/cpe.py @@ -15,7 +15,10 @@ # You should have received a copy of the GNU General Public License # along with repology. If not, see . -def cpe_parse(cpe_str: str) -> list[...]: +from typing import List + + +def cpe_parse(cpe_str: str) -> List[int]: escaped = False current = '' res = [] From 0226995adf2f6dad98fa1ac65b0d944b7f6e9cb1 Mon Sep 17 00:00:00 2001 From: sicota Date: Tue, 13 Apr 2021 20:13:34 +0200 Subject: [PATCH 6/9] Satisfy linter --- repology/parsers/cpe.py | 2 +- repology/parsers/parsers/gentoo.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/repology/parsers/cpe.py b/repology/parsers/cpe.py index 4ef663623..bf3a1c0c0 100644 --- a/repology/parsers/cpe.py +++ b/repology/parsers/cpe.py @@ -18,7 +18,7 @@ from typing import List -def cpe_parse(cpe_str: str) -> List[int]: +def cpe_parse(cpe_str: str) -> List[str]: escaped = False current = '' res = [] diff --git a/repology/parsers/parsers/gentoo.py b/repology/parsers/parsers/gentoo.py index 09535ff8e..bd93ad7da 100644 --- a/repology/parsers/parsers/gentoo.py +++ b/repology/parsers/parsers/gentoo.py @@ -184,10 +184,10 @@ def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTra cpe = cpe_parse(xml_metadata.cpe) # distinguish between cpe format 2.2 or 2.3 - if cpe[1] == '2.3': - pkg.add_cpe(cpe[3], cpe[4]) + if cpe['1'] == '2.3': + pkg.add_cpe(cpe['3'], cpe['4']) else: - pkg.add_cpe(cpe[2], cpe[3]) + pkg.add_cpe(cpe['2'], cpe['3']) for ebuild in _iter_ebuilds(path, category, package): subpkg = pkg.clone(append_ident='/' + ebuild) From 294162707fbcd8703e4119b173eb695bdfde8d09 Mon Sep 17 00:00:00 2001 From: sicota Date: Tue, 13 Apr 2021 20:23:04 +0200 Subject: [PATCH 7/9] Fixing --- repology/parsers/parsers/gentoo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/repology/parsers/parsers/gentoo.py b/repology/parsers/parsers/gentoo.py index bd93ad7da..09535ff8e 100644 --- a/repology/parsers/parsers/gentoo.py +++ b/repology/parsers/parsers/gentoo.py @@ -184,10 +184,10 @@ def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTra cpe = cpe_parse(xml_metadata.cpe) # distinguish between cpe format 2.2 or 2.3 - if cpe['1'] == '2.3': - pkg.add_cpe(cpe['3'], cpe['4']) + if cpe[1] == '2.3': + pkg.add_cpe(cpe[3], cpe[4]) else: - pkg.add_cpe(cpe['2'], cpe['3']) + pkg.add_cpe(cpe[2], cpe[3]) for ebuild in _iter_ebuilds(path, category, package): subpkg = pkg.clone(append_ident='/' + ebuild) From da9134a7b8d45f0818ef532775eeaf0ac0e2ab4b Mon Sep 17 00:00:00 2001 From: sicota Date: Fri, 16 Apr 2021 23:49:03 +0200 Subject: [PATCH 8/9] Adding dataclass --- repology/parsers/cpe.py | 26 +++++++++++++++++++++++--- repology/parsers/parsers/gentoo.py | 11 +++-------- repology/test/test_cpe.py | 21 +++++++++++---------- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/repology/parsers/cpe.py b/repology/parsers/cpe.py index bf3a1c0c0..4b00f6936 100644 --- a/repology/parsers/cpe.py +++ b/repology/parsers/cpe.py @@ -15,10 +15,24 @@ # You should have received a copy of the GNU General Public License # along with repology. If not, see . -from typing import List +from dataclasses import dataclass -def cpe_parse(cpe_str: str) -> List[str]: +@dataclass(unsafe_hash=True) +class CPE: + part: str = '*' + vendor: str = '*' + product: str = '*' + version: str = '*' + update: str = '*' + edition: str = '*' + lang: str = '*' + sw_edition: str = '*' + target_sw: str = '*' + target_hw: str = '*' + other: str = '*' + +def cpe_parse(cpe_str: str) -> CPE: escaped = False current = '' res = [] @@ -36,4 +50,10 @@ def cpe_parse(cpe_str: str) -> List[str]: current += char res.append(current) - return res + + if len(res) < 3: + return CPE() # input seems to be faulty, return default CPE + elif res[1] == '2.3': + return CPE(*res[3:]) # input seems to be CPE format 2.3 + else: + return CPE(*res[2:]) # input seems to be CPE format 2.2 diff --git a/repology/parsers/parsers/gentoo.py b/repology/parsers/parsers/gentoo.py index 09535ff8e..7912ead2c 100644 --- a/repology/parsers/parsers/gentoo.py +++ b/repology/parsers/parsers/gentoo.py @@ -17,14 +17,14 @@ import os import xml.etree.ElementTree + from dataclasses import dataclass, field from typing import Dict, Iterable, List, Optional, Set, Tuple - +from repology.parsers.cpe import cpe_parse from repology.logger import Logger from repology.package import PackageFlags from repology.packagemaker import NameType, PackageFactory, PackageMaker from repology.parsers import Parser -from repology.parsers.cpe import cpe_parse from repology.parsers.maintainers import extract_maintainers from repology.parsers.versions import VersionStripper from repology.transformer import PackageTransformer @@ -182,12 +182,7 @@ def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTra if xml_metadata.cpe is not None: cpe = cpe_parse(xml_metadata.cpe) - - # distinguish between cpe format 2.2 or 2.3 - if cpe[1] == '2.3': - pkg.add_cpe(cpe[3], cpe[4]) - else: - pkg.add_cpe(cpe[2], cpe[3]) + pkg.add_cpe(cpe.part, cpe.vendor) for ebuild in _iter_ebuilds(path, category, package): subpkg = pkg.clone(append_ident='/' + ebuild) diff --git a/repology/test/test_cpe.py b/repology/test/test_cpe.py index dd553d04a..91026661a 100755 --- a/repology/test/test_cpe.py +++ b/repology/test/test_cpe.py @@ -21,23 +21,24 @@ import unittest -from repology.parsers.cpe import cpe_parse +from repology.parsers.cpe import cpe_parse, CPE class TestCpe(unittest.TestCase): def test_cpe_parse(self) -> None: - self.assertEqual(cpe_parse('foo:bar'), ['foo', 'bar']) - self.assertEqual(cpe_parse('foobar'), ['foobar']) - self.assertEqual(cpe_parse('cpe:2.3:a:andreas_mueller:cdrdao'), ['cpe', '2.3', 'a', 'andreas_mueller', 'cdrdao']) - self.assertEqual(cpe_parse('cpe:/a:archive\\:\\:tar_project:archive\\:\\:tar'), ['cpe', '/a', 'archive\\:\\:tar_project', 'archive\\:\\:tar']) - self.assertEqual(cpe_parse('foo\\:bar'), ['foo\\:bar']) - self.assertEqual(cpe_parse('foo\\\\:bar'), ['foo\\\\', 'bar']) + self.assertIsInstance(cpe_parse('foo:bar'), CPE) + self.assertEqual(cpe_parse('foo:bar'), CPE(part='*', vendor='*', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('foobar'), CPE(part='*', vendor='*', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('cpe:2.3:a:andreas_mueller:cdrdao'), CPE(part='andreas_mueller', vendor='cdrdao', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('cpe:/a:archive\\:\\:tar_project:archive\\:\\:tar'), CPE(part='archive\\:\\:tar_project', vendor='archive\\:\\:tar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('1:2:foo\\:bar'), CPE(part='foo\\:bar', vendor='*', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('1:2:foo\\\\:bar'), CPE(part='foo\\\\', vendor='bar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) @unittest.expectedFailure def test_cpe_parse_failure(self) -> None: - self.assertEqual(cpe_parse('a:b'), ['ab']) - self.assertEqual(cpe_parse('foo\\:bar'), ['foo:bar']) - self.assertEqual(cpe_parse('foo\\\\:bar'), ['foo', 'bar']) + self.assertEqual(cpe_parse('a:b'), CPE(part='a', vendor='b', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('1:2:foo\\:bar'), CPE(part='foo', vendor='bar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('1:2:foo\\\\:bar'), CPE(part='foo', vendor='bar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) if __name__ == '__main__': From 3873bfecd272faf77c71f3d1338f4110da28f9f6 Mon Sep 17 00:00:00 2001 From: sicota Date: Fri, 16 Apr 2021 23:49:03 +0200 Subject: [PATCH 9/9] Adding dataclass --- repology/parsers/cpe.py | 26 +++++++++++++++++++++++--- repology/parsers/parsers/gentoo.py | 11 +++-------- repology/test/test_cpe.py | 21 +++++++++++---------- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/repology/parsers/cpe.py b/repology/parsers/cpe.py index bf3a1c0c0..4b00f6936 100644 --- a/repology/parsers/cpe.py +++ b/repology/parsers/cpe.py @@ -15,10 +15,24 @@ # You should have received a copy of the GNU General Public License # along with repology. If not, see . -from typing import List +from dataclasses import dataclass -def cpe_parse(cpe_str: str) -> List[str]: +@dataclass(unsafe_hash=True) +class CPE: + part: str = '*' + vendor: str = '*' + product: str = '*' + version: str = '*' + update: str = '*' + edition: str = '*' + lang: str = '*' + sw_edition: str = '*' + target_sw: str = '*' + target_hw: str = '*' + other: str = '*' + +def cpe_parse(cpe_str: str) -> CPE: escaped = False current = '' res = [] @@ -36,4 +50,10 @@ def cpe_parse(cpe_str: str) -> List[str]: current += char res.append(current) - return res + + if len(res) < 3: + return CPE() # input seems to be faulty, return default CPE + elif res[1] == '2.3': + return CPE(*res[3:]) # input seems to be CPE format 2.3 + else: + return CPE(*res[2:]) # input seems to be CPE format 2.2 diff --git a/repology/parsers/parsers/gentoo.py b/repology/parsers/parsers/gentoo.py index 09535ff8e..7912ead2c 100644 --- a/repology/parsers/parsers/gentoo.py +++ b/repology/parsers/parsers/gentoo.py @@ -17,14 +17,14 @@ import os import xml.etree.ElementTree + from dataclasses import dataclass, field from typing import Dict, Iterable, List, Optional, Set, Tuple - +from repology.parsers.cpe import cpe_parse from repology.logger import Logger from repology.package import PackageFlags from repology.packagemaker import NameType, PackageFactory, PackageMaker from repology.parsers import Parser -from repology.parsers.cpe import cpe_parse from repology.parsers.maintainers import extract_maintainers from repology.parsers.versions import VersionStripper from repology.transformer import PackageTransformer @@ -182,12 +182,7 @@ def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTra if xml_metadata.cpe is not None: cpe = cpe_parse(xml_metadata.cpe) - - # distinguish between cpe format 2.2 or 2.3 - if cpe[1] == '2.3': - pkg.add_cpe(cpe[3], cpe[4]) - else: - pkg.add_cpe(cpe[2], cpe[3]) + pkg.add_cpe(cpe.part, cpe.vendor) for ebuild in _iter_ebuilds(path, category, package): subpkg = pkg.clone(append_ident='/' + ebuild) diff --git a/repology/test/test_cpe.py b/repology/test/test_cpe.py index dd553d04a..91026661a 100755 --- a/repology/test/test_cpe.py +++ b/repology/test/test_cpe.py @@ -21,23 +21,24 @@ import unittest -from repology.parsers.cpe import cpe_parse +from repology.parsers.cpe import cpe_parse, CPE class TestCpe(unittest.TestCase): def test_cpe_parse(self) -> None: - self.assertEqual(cpe_parse('foo:bar'), ['foo', 'bar']) - self.assertEqual(cpe_parse('foobar'), ['foobar']) - self.assertEqual(cpe_parse('cpe:2.3:a:andreas_mueller:cdrdao'), ['cpe', '2.3', 'a', 'andreas_mueller', 'cdrdao']) - self.assertEqual(cpe_parse('cpe:/a:archive\\:\\:tar_project:archive\\:\\:tar'), ['cpe', '/a', 'archive\\:\\:tar_project', 'archive\\:\\:tar']) - self.assertEqual(cpe_parse('foo\\:bar'), ['foo\\:bar']) - self.assertEqual(cpe_parse('foo\\\\:bar'), ['foo\\\\', 'bar']) + self.assertIsInstance(cpe_parse('foo:bar'), CPE) + self.assertEqual(cpe_parse('foo:bar'), CPE(part='*', vendor='*', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('foobar'), CPE(part='*', vendor='*', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('cpe:2.3:a:andreas_mueller:cdrdao'), CPE(part='andreas_mueller', vendor='cdrdao', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('cpe:/a:archive\\:\\:tar_project:archive\\:\\:tar'), CPE(part='archive\\:\\:tar_project', vendor='archive\\:\\:tar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('1:2:foo\\:bar'), CPE(part='foo\\:bar', vendor='*', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('1:2:foo\\\\:bar'), CPE(part='foo\\\\', vendor='bar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) @unittest.expectedFailure def test_cpe_parse_failure(self) -> None: - self.assertEqual(cpe_parse('a:b'), ['ab']) - self.assertEqual(cpe_parse('foo\\:bar'), ['foo:bar']) - self.assertEqual(cpe_parse('foo\\\\:bar'), ['foo', 'bar']) + self.assertEqual(cpe_parse('a:b'), CPE(part='a', vendor='b', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('1:2:foo\\:bar'), CPE(part='foo', vendor='bar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) + self.assertEqual(cpe_parse('1:2:foo\\\\:bar'), CPE(part='foo', vendor='bar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*')) if __name__ == '__main__':