Skip to content

Commit

Permalink
Search for correct columns, if not found is because v>=1.3 so look ag… (
Browse files Browse the repository at this point in the history
#41)

* Search for correct columns, if not found is because v>=1.3 so look again in aln file

* generalise parsing logic to also work for AreTomo v1.3.0 files (#42)

* generalise parsing logic to also work for AreTomo v1.3.0 files, add example file and test

* add EOF

Co-authored-by: alisterburt <[email protected]>
  • Loading branch information
EuanPyle and alisterburt authored Nov 1, 2022
1 parent e7cea7e commit 5455703
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 10 deletions.
10 changes: 9 additions & 1 deletion lil_aretomo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,15 @@ def check_aretomo_on_path():

def read_aln(filename: os.PathLike) -> pd.DataFrame:
"""Read an AreTomo .aln file"""
df = pd.read_csv(filename, header='infer', skiprows=2, delimiter=r'\s+')
with open(filename, 'r') as f:
lines_starting_with_hash = 0
for line in f.readlines():
if line.startswith('#'):
lines_starting_with_hash += 1
else:
break
lines_to_skip = lines_starting_with_hash - 1
df = pd.read_csv(filename, header='infer', skiprows=lines_to_skip, delimiter=r'\s+')

# '#' character in header line is parsed as a column name
# drop empty column on the far right and shift column names to the left by 1
Expand Down
9 changes: 7 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,10 @@ def test_data_dir() -> Path:


@pytest.fixture
def aln_file(test_data_dir) -> Path:
return test_data_dir / 'example.aln'
def aln_file_v1_2_0(test_data_dir) -> Path:
return test_data_dir / 'example_v1_2_0.aln'


@pytest.fixture
def aln_file_v1_3_0(test_data_dir) -> Path:
return test_data_dir / 'example_v1_3_0.aln'
File renamed without changes.
45 changes: 45 additions & 0 deletions tests/test_data/example_v1_3_0.aln
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# AreTomo Alignment / Priims bprmMn
# RawSize = 5760 4092 41
# NumPatches = 0
# SEC ROT GMAG TX TY SMEAN SFIT SCALE BASE TILT
0 -93.6692 1.00000 157.142 720.866 1.00 1.00 1.00 0.00 -51.98
1 -93.6692 1.00000 130.526 686.004 1.00 1.00 1.00 0.00 -48.99
2 -93.6692 1.00000 135.995 620.995 1.00 1.00 1.00 0.00 -45.98
3 -93.6692 1.00000 108.331 572.701 1.00 1.00 1.00 0.00 -42.99
4 -93.6692 1.00000 108.941 512.625 1.00 1.00 1.00 0.00 -39.99
5 -93.6692 1.00000 84.490 451.466 1.00 1.00 1.00 0.00 -36.99
6 -93.6692 1.00000 81.471 403.957 1.00 1.00 1.00 0.00 -33.99
7 -93.6692 1.00000 61.171 350.820 1.00 1.00 1.00 0.00 -30.99
8 -93.6692 1.00000 54.104 300.074 1.00 1.00 1.00 0.00 -27.99
9 -93.6692 1.00000 43.330 273.491 1.00 1.00 1.00 0.00 -24.99
10 -93.6692 1.00000 39.284 247.165 1.00 1.00 1.00 0.00 -21.99
11 -93.6692 1.00000 33.427 213.190 1.00 1.00 1.00 0.00 -18.99
12 -93.6692 1.00000 27.254 176.363 1.00 1.00 1.00 0.00 -15.99
13 -93.6692 1.00000 25.066 140.100 1.00 1.00 1.00 0.00 -12.99
14 -93.6692 1.00000 23.384 98.713 1.00 1.00 1.00 0.00 -9.99
15 -93.6692 1.00000 15.737 57.712 1.00 1.00 1.00 0.00 -6.99
16 -93.6692 1.00000 11.379 30.819 1.00 1.00 1.00 0.00 -4.00
17 -93.6692 1.00000 0.000 0.000 1.00 1.00 1.00 0.00 -0.99
18 -93.6692 1.00000 -4.137 -24.714 1.00 1.00 1.00 0.00 2.01
19 -93.6692 1.00000 2.242 -53.349 1.00 1.00 1.00 0.00 5.01
20 -93.6692 1.00000 -5.422 -82.151 1.00 1.00 1.00 0.00 8.00
21 -93.6692 1.00000 1.357 -112.474 1.00 1.00 1.00 0.00 11.01
22 -93.6692 1.00000 -3.247 -147.366 1.00 1.00 1.00 0.00 14.00
23 -93.6692 1.00000 1.638 -180.845 1.00 1.00 1.00 0.00 17.00
24 -93.6692 1.00000 -2.903 -237.837 1.00 1.00 1.00 0.00 20.00
25 -93.6692 1.00000 0.473 -261.575 1.00 1.00 1.00 0.00 23.00
26 -93.6692 1.00000 1.526 -317.741 1.00 1.00 1.00 0.00 26.00
27 -93.6692 1.00000 0.551 -341.948 1.00 1.00 1.00 0.00 29.00
28 -93.6692 1.00000 4.927 -396.563 1.00 1.00 1.00 0.00 32.00
29 -93.6692 1.00000 1.483 -420.524 1.00 1.00 1.00 0.00 35.00
30 -93.6692 1.00000 10.616 -468.995 1.00 1.00 1.00 0.00 38.00
31 -93.6692 1.00000 -2.572 -492.375 1.00 1.00 1.00 0.00 41.00
32 -93.6692 1.00000 14.611 -566.703 1.00 1.00 1.00 0.00 44.00
33 -93.6692 1.00000 -0.127 -586.438 1.00 1.00 1.00 0.00 47.00
34 -93.6692 1.00000 21.029 -620.977 1.00 1.00 1.00 0.00 50.00
35 -93.6692 1.00000 21.205 -708.150 1.00 1.00 1.00 0.00 53.00
36 -93.6692 1.00000 28.710 -735.140 1.00 1.00 1.00 0.00 56.00
37 -93.6692 1.00000 34.936 -741.670 1.00 1.00 1.00 0.00 59.00
38 -93.6692 1.00000 41.539 -748.414 1.00 1.00 1.00 0.00 62.00
39 -93.6692 1.00000 42.722 -771.160 1.00 1.00 1.00 0.00 65.00
40 -93.6692 1.00000 44.463 -809.333 1.00 1.00 1.00 0.00 68.00
15 changes: 8 additions & 7 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
from lil_aretomo.utils import read_aln


def test_read_aln(aln_file):
def test_read_aln(aln_file_v1_2_0, aln_file_v1_3_0):
"""Check that aln files are correctly parsed."""
df = read_aln(aln_file)
assert isinstance(df, pd.DataFrame)
assert df.shape == (41, 10)
expected_columns = [
'SEC', 'ROT', 'GMAG', 'TX', 'TY', 'SMEAN', 'SFIT', 'SCALE', 'BASE', 'TILT'
]
for aln_file in (aln_file_v1_2_0, aln_file_v1_3_0):
df = read_aln(aln_file)
assert isinstance(df, pd.DataFrame)
assert df.shape == (41, 10)
expected_columns = [
'SEC', 'ROT', 'GMAG', 'TX', 'TY', 'SMEAN', 'SFIT', 'SCALE', 'BASE', 'TILT'
]
assert all(col in df.columns for col in expected_columns)

0 comments on commit 5455703

Please sign in to comment.