diff --git a/.nocover.yaml b/.nocover.yaml index 8147e89..bd157d3 100644 --- a/.nocover.yaml +++ b/.nocover.yaml @@ -45,4 +45,3 @@ nocover_regexes: - def is_glob_exp - def get_gitignore_glob - def parse_gitignore_line - - def get_language_from_hashbang diff --git a/coala_quickstart/generation/Project.py b/coala_quickstart/generation/Project.py index 106f9d9..2423055 100644 --- a/coala_quickstart/generation/Project.py +++ b/coala_quickstart/generation/Project.py @@ -5,10 +5,12 @@ from coala_utils.string_processing.StringConverter import StringConverter from coala_utils.Extensions import exts -from coala_quickstart.generation.Utilities import get_language_from_hashbang +from coala_quickstart.generation.Utilities import ( + get_hashbang, + get_language_from_hashbang, +) from coala_quickstart.Constants import ( ASK_TO_SELECT_LANG, - HASHBANG_REGEX, ) @@ -56,15 +58,18 @@ def language_percentage(file_paths): for lang in exts[ext]: results[lang] += delta - elif os.path.exists(file_path): - with open(file_path, 'r') as data: - hashbang = data.readline() - if re.match(HASHBANG_REGEX, hashbang): - language = get_language_from_hashbang(hashbang).lower() - for ext in exts: - for lang in exts[ext]: - if language == lang.lower(): - results[lang.lower()] += delta + continue + + hashbang = get_hashbang(file_path) + + if not hashbang: + continue + + language = get_language_from_hashbang(hashbang).lower() + for ext in exts: + for lang in exts[ext]: + if language == lang.lower(): + results[lang.lower()] += delta return results diff --git a/coala_quickstart/generation/Utilities.py b/coala_quickstart/generation/Utilities.py index 6031c23..7c40e92 100644 --- a/coala_quickstart/generation/Utilities.py +++ b/coala_quickstart/generation/Utilities.py @@ -99,16 +99,21 @@ def split_by_language(project_files): for lang in exts[ext]: lang_files[lang.lower()].add(file) lang_files['all'].add(file) - else: # pragma: nocover - with open(file, 'r') as data: - hashbang = data.readline() - if(re.match(HASHBANG_REGEX, hashbang)): - language = get_language_from_hashbang(hashbang).lower() - for ext in exts: - for lang in exts[ext]: - if language == lang.lower(): - lang_files[lang.lower()].add(file) - lang_files['all'].add(file) + + continue + + hashbang = get_hashbang(file) + + if not hashbang: + continue + + language = get_language_from_hashbang(hashbang).lower() + for ext in exts: + for lang in exts[ext]: + if language == lang.lower(): + lang_files[lang.lower()].add(file) + lang_files['all'].add(file) + return lang_files @@ -179,8 +184,29 @@ def search_for_orig(decorated, orig_name): return found +def get_hashbang(file_path): + if not os.path.exists(file_path): + return + + try: + with open(file_path, 'r') as data: + hashbang = data.readline() + except UnicodeDecodeError: # pragma nt: no cover + return + + hashbang = hashbang.strip() + if not hashbang: + return + + if not re.match(HASHBANG_REGEX, hashbang): + return + + return hashbang + + def get_language_from_hashbang(hashbang): - if(re.match('(^#!(.*))', hashbang)): + assert hashbang + if hashbang: # pragma: no branch hashbang_contents = hashbang.split(' ') try: # For eg: #!bin/bash python3 diff --git a/setup.cfg b/setup.cfg index 86e3644..cdc8013 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,6 +90,5 @@ exclude_lines = def is_glob_exp def get_gitignore_glob def parse_gitignore_line - def get_language_from_hashbang [coverage:force_end_of_section] diff --git a/tests/generation/UtilitiesTest.py b/tests/generation/UtilitiesTest.py index 35fa4c6..28d129c 100644 --- a/tests/generation/UtilitiesTest.py +++ b/tests/generation/UtilitiesTest.py @@ -3,12 +3,16 @@ import types import unittest +from tempfile import NamedTemporaryFile + from tests.test_bears.AllKindsOfSettingsDependentBear import ( AllKindsOfSettingsDependentBear) from coala_quickstart.generation.Utilities import ( contained_in, + get_hashbang, get_default_args, get_all_args, search_for_orig, concatenate, peek, + split_by_language, get_language_from_hashbang) from coalib.results.SourcePosition import SourcePosition from coalib.results.SourceRange import SourceRange @@ -73,11 +77,89 @@ def test_get_all_args(self): 'no_chars': 79, 'chars': False, 'dependency_results': {}}) + +class TestHashBang(unittest.TestCase): + + def test_missing_file(self): + self.assertIsNone(get_hashbang('does_not_exist')) + + def test_with_bash(self): + with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: + temp_file.write('#!bin/bash\n') + temp_file.close() + self.assertEqual(get_hashbang(temp_file.name), '#!bin/bash') + + def test_no_eol(self): + with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: + temp_file.write('#!bin/bash') + temp_file.close() + self.assertEqual(get_hashbang(temp_file.name), '#!bin/bash') + + def test_with_slash(self): + with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: + temp_file.write('#!/bin/bash\n') + temp_file.close() + self.assertEqual(get_hashbang(temp_file.name), '#!/bin/bash') + + def test_with_space(self): + with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: + temp_file.write('#!/bin/bash \n') + temp_file.close() + self.assertEqual(get_hashbang(temp_file.name), '#!/bin/bash') + + def test_env(self): + with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: + temp_file.write('#!/bin/env bash\n') + temp_file.close() + self.assertEqual(get_hashbang(temp_file.name), '#!/bin/env bash') + + def test_non_unicode_file(self): + with NamedTemporaryFile(mode='w+b', delete=False) as temp_file: + temp_file.write(b'\2000x80') + temp_file.close() + self.assertIsNone(get_hashbang(temp_file.name)) + + def test_empty_file(self): + with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: + temp_file.write('\n') + temp_file.close() + self.assertIsNone(get_hashbang(temp_file.name)) + + def test_no_bang(self): + with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: + temp_file.write('#bin/bash') + temp_file.close() + self.assertIsNone(get_hashbang(temp_file.name)) + + def test_no_hash(self): + with NamedTemporaryFile(mode='w+t', delete=False) as temp_file: + temp_file.write('!bin/bash') + temp_file.close() + self.assertIsNone(get_hashbang(temp_file.name)) + def test_get_language_from_hashbang(self): self.assertEqual(get_language_from_hashbang('#!/usr/bin/env python'), 'python') self.assertEqual(get_language_from_hashbang('#!bin/bash'), 'bash') + self.assertEqual(get_language_from_hashbang('#!/bin/bash'), + 'bash') + + def test_split_by_language(self): + with NamedTemporaryFile(delete=False, suffix='.py') as temp_file1, \ + NamedTemporaryFile(delete=False, suffix='.txt') as temp_file2, \ + NamedTemporaryFile(delete=False, suffix='.txt') as temp_file3: + temp_file3.write(b'#!bin/python') + temp_file3.close() + langs = split_by_language( + [temp_file1.name, temp_file2.name, temp_file3.name]) + self.assertCountEqual( + langs, + { + 'all': [temp_file1.name, temp_file3.name], + 'python': [temp_file1.name, temp_file3.name], + } + ) class TestDataStructuresOperationsFunctions(unittest.TestCase):