Skip to content

Commit

Permalink
generation/: Ignore files which are not unicode
Browse files Browse the repository at this point in the history
When attempting to determine language based on hashbang,
files which can not be opened as Unicode should be ignored.

Also merge the hashbang detection code and add coverage
of split_by_language.

Fixes coala#293
  • Loading branch information
jayvdb committed Oct 13, 2018
1 parent 7bd5689 commit 5c26e67
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 24 deletions.
1 change: 0 additions & 1 deletion .nocover.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,3 @@ nocover_regexes:
- def is_glob_exp
- def get_gitignore_glob
- def parse_gitignore_line
- def get_language_from_hashbang
27 changes: 16 additions & 11 deletions coala_quickstart/generation/Project.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

from coala_utils.string_processing.StringConverter import StringConverter
from coala_utils.Extensions import exts
from coala_quickstart.generation.Utilities import get_language_from_hashbang
from coala_quickstart.generation.Utilities import (
get_hashbang,
get_language_from_hashbang,
)
from coala_quickstart.Constants import (
ASK_TO_SELECT_LANG,
HASHBANG_REGEX,
)


Expand Down Expand Up @@ -56,15 +58,18 @@ def language_percentage(file_paths):
for lang in exts[ext]:
results[lang] += delta

elif os.path.exists(file_path):
with open(file_path, 'r') as data:
hashbang = data.readline()
if re.match(HASHBANG_REGEX, hashbang):
language = get_language_from_hashbang(hashbang).lower()
for ext in exts:
for lang in exts[ext]:
if language == lang.lower():
results[lang.lower()] += delta
continue

hashbang = get_hashbang(file_path)

if not hashbang:
continue

language = get_language_from_hashbang(hashbang).lower()
for ext in exts:
for lang in exts[ext]:
if language == lang.lower():
results[lang.lower()] += delta

return results

Expand Down
48 changes: 37 additions & 11 deletions coala_quickstart/generation/Utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,21 @@ def split_by_language(project_files):
for lang in exts[ext]:
lang_files[lang.lower()].add(file)
lang_files['all'].add(file)
else: # pragma: nocover
with open(file, 'r') as data:
hashbang = data.readline()
if(re.match(HASHBANG_REGEX, hashbang)):
language = get_language_from_hashbang(hashbang).lower()
for ext in exts:
for lang in exts[ext]:
if language == lang.lower():
lang_files[lang.lower()].add(file)
lang_files['all'].add(file)

continue

hashbang = get_hashbang(file)

if not hashbang:
continue

language = get_language_from_hashbang(hashbang).lower()
for ext in exts:
for lang in exts[ext]:
if language == lang.lower():
lang_files[lang.lower()].add(file)
lang_files['all'].add(file)

return lang_files


Expand Down Expand Up @@ -179,8 +184,29 @@ def search_for_orig(decorated, orig_name):
return found


def get_hashbang(file_path):
if not os.path.exists(file_path):
return

try:
with open(file_path, 'r') as data:
hashbang = data.readline()
except UnicodeDecodeError: # pragma nt: no cover
return

hashbang = hashbang.strip()
if not hashbang:
return

if not re.match(HASHBANG_REGEX, hashbang):
return

return hashbang


def get_language_from_hashbang(hashbang):
if(re.match('(^#!(.*))', hashbang)):
assert hashbang
if hashbang: # pragma: no branch
hashbang_contents = hashbang.split(' ')
try:
# For eg: #!bin/bash python3
Expand Down
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,5 @@ exclude_lines =
def is_glob_exp
def get_gitignore_glob
def parse_gitignore_line
def get_language_from_hashbang

[coverage:force_end_of_section]
82 changes: 82 additions & 0 deletions tests/generation/UtilitiesTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
import types
import unittest

from tempfile import NamedTemporaryFile

from tests.test_bears.AllKindsOfSettingsDependentBear import (
AllKindsOfSettingsDependentBear)
from coala_quickstart.generation.Utilities import (
contained_in,
get_hashbang,
get_default_args, get_all_args,
search_for_orig, concatenate, peek,
split_by_language,
get_language_from_hashbang)
from coalib.results.SourcePosition import SourcePosition
from coalib.results.SourceRange import SourceRange
Expand Down Expand Up @@ -73,11 +77,89 @@ def test_get_all_args(self):
'no_chars': 79,
'chars': False, 'dependency_results': {}})


class TestHashBang(unittest.TestCase):

def test_missing_file(self):
self.assertIsNone(get_hashbang('does_not_exist'))

def test_with_bash(self):
with NamedTemporaryFile(mode='w+t', delete=False) as temp_file:
temp_file.write('#!bin/bash\n')
temp_file.close()
self.assertEqual(get_hashbang(temp_file.name), '#!bin/bash')

def test_no_eol(self):
with NamedTemporaryFile(mode='w+t', delete=False) as temp_file:
temp_file.write('#!bin/bash')
temp_file.close()
self.assertEqual(get_hashbang(temp_file.name), '#!bin/bash')

def test_with_slash(self):
with NamedTemporaryFile(mode='w+t', delete=False) as temp_file:
temp_file.write('#!/bin/bash\n')
temp_file.close()
self.assertEqual(get_hashbang(temp_file.name), '#!/bin/bash')

def test_with_space(self):
with NamedTemporaryFile(mode='w+t', delete=False) as temp_file:
temp_file.write('#!/bin/bash \n')
temp_file.close()
self.assertEqual(get_hashbang(temp_file.name), '#!/bin/bash')

def test_env(self):
with NamedTemporaryFile(mode='w+t', delete=False) as temp_file:
temp_file.write('#!/bin/env bash\n')
temp_file.close()
self.assertEqual(get_hashbang(temp_file.name), '#!/bin/env bash')

def test_non_unicode_file(self):
with NamedTemporaryFile(mode='w+b', delete=False) as temp_file:
temp_file.write(b'\2000x80')
temp_file.close()
self.assertIsNone(get_hashbang(temp_file.name))

def test_empty_file(self):
with NamedTemporaryFile(mode='w+t', delete=False) as temp_file:
temp_file.write('\n')
temp_file.close()
self.assertIsNone(get_hashbang(temp_file.name))

def test_no_bang(self):
with NamedTemporaryFile(mode='w+t', delete=False) as temp_file:
temp_file.write('#bin/bash')
temp_file.close()
self.assertIsNone(get_hashbang(temp_file.name))

def test_no_hash(self):
with NamedTemporaryFile(mode='w+t', delete=False) as temp_file:
temp_file.write('!bin/bash')
temp_file.close()
self.assertIsNone(get_hashbang(temp_file.name))

def test_get_language_from_hashbang(self):
self.assertEqual(get_language_from_hashbang('#!/usr/bin/env python'),
'python')
self.assertEqual(get_language_from_hashbang('#!bin/bash'),
'bash')
self.assertEqual(get_language_from_hashbang('#!/bin/bash'),
'bash')

def test_split_by_language(self):
with NamedTemporaryFile(delete=False, suffix='.py') as temp_file1, \
NamedTemporaryFile(delete=False, suffix='.txt') as temp_file2, \
NamedTemporaryFile(delete=False, suffix='.txt') as temp_file3:
temp_file3.write(b'#!bin/python')
temp_file3.close()
langs = split_by_language(
[temp_file1.name, temp_file2.name, temp_file3.name])
self.assertCountEqual(
langs,
{
'all': [temp_file1.name, temp_file3.name],
'python': [temp_file1.name, temp_file3.name],
}
)


class TestDataStructuresOperationsFunctions(unittest.TestCase):
Expand Down

0 comments on commit 5c26e67

Please sign in to comment.