Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing cpe data split for cpe fields containing colon #1142

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
59 changes: 59 additions & 0 deletions repology/parsers/cpe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (C) 2019 Dmitry Marakasov <[email protected]>
#
# This file is part of repology
#
# repology is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# repology is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with repology. If not, see <http://www.gnu.org/licenses/>.

from dataclasses import dataclass


@dataclass(unsafe_hash=True)
class CPE:
part: str = '*'
vendor: str = '*'
product: str = '*'
version: str = '*'
update: str = '*'
edition: str = '*'
lang: str = '*'
sw_edition: str = '*'
target_sw: str = '*'
target_hw: str = '*'
other: str = '*'

def cpe_parse(cpe_str: str) -> CPE:
escaped = False
current = ''
res = []

for char in cpe_str:
if escaped:
current += '\\' + char
escaped = False
elif char == '\\':
escaped = True
elif char == ':':
res.append(current)
current = ''
else:
current += char

res.append(current)

if len(res) < 3:
return CPE() # input seems to be faulty, return default CPE
elif res[1] == '2.3':
return CPE(*res[3:]) # input seems to be CPE format 2.3
else:
return CPE(*res[2:]) # input seems to be CPE format 2.2
8 changes: 5 additions & 3 deletions repology/parsers/parsers/gentoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

import os
import xml.etree.ElementTree

from dataclasses import dataclass, field
from typing import Dict, Iterable, List, Optional, Set, Tuple
from repology.parsers.cpe import cpe_parse
from typing import Iterable

from repology.logger import Logger
from repology.package import PackageFlags
from repology.packagemaker import NameType, PackageFactory, PackageMaker
Expand Down Expand Up @@ -179,8 +181,8 @@ def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMake
pkg.add_maintainers(xml_metadata.maintainers)

if xml_metadata.cpe is not None:
cpe = xml_metadata.cpe.split(':')
pkg.add_cpe(cpe[2], cpe[3])
cpe = cpe_parse(xml_metadata.cpe)
pkg.add_cpe(cpe.part, cpe.vendor)

for ebuild in _iter_ebuilds(path, category, package):
subpkg = pkg.clone(append_ident='/' + ebuild)
Expand Down
45 changes: 45 additions & 0 deletions repology/test/test_cpe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python3
#
# Copyright (C) 2021 Dmitry Marakasov <[email protected]>
#
# This file is part of repology
#
# repology is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# repology is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with repology. If not, see <http://www.gnu.org/licenses/>.

# mypy: no-disallow-untyped-calls

import unittest

from repology.parsers.cpe import cpe_parse, CPE


class TestCpe(unittest.TestCase):
def test_cpe_parse(self) -> None:
self.assertIsInstance(cpe_parse('foo:bar'), CPE)
self.assertEqual(cpe_parse('foo:bar'), CPE(part='*', vendor='*', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*'))
self.assertEqual(cpe_parse('foobar'), CPE(part='*', vendor='*', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*'))
self.assertEqual(cpe_parse('cpe:2.3:a:andreas_mueller:cdrdao'), CPE(part='andreas_mueller', vendor='cdrdao', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*'))
self.assertEqual(cpe_parse('cpe:/a:archive\\:\\:tar_project:archive\\:\\:tar'), CPE(part='archive\\:\\:tar_project', vendor='archive\\:\\:tar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*'))
self.assertEqual(cpe_parse('1:2:foo\\:bar'), CPE(part='foo\\:bar', vendor='*', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*'))
self.assertEqual(cpe_parse('1:2:foo\\\\:bar'), CPE(part='foo\\\\', vendor='bar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*'))

@unittest.expectedFailure
def test_cpe_parse_failure(self) -> None:
self.assertEqual(cpe_parse('a:b'), CPE(part='a', vendor='b', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*'))
self.assertEqual(cpe_parse('1:2:foo\\:bar'), CPE(part='foo', vendor='bar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*'))
self.assertEqual(cpe_parse('1:2:foo\\\\:bar'), CPE(part='foo', vendor='bar', product='*', version='*', update='*', edition='*', lang='*', sw_edition='*', target_sw='*', target_hw='*', other='*'))


if __name__ == '__main__':
unittest.main()