Skip to content

Commit

Permalink
Gotoh Local
Browse files Browse the repository at this point in the history
  • Loading branch information
dawnandrew100 committed Nov 15, 2024
1 parent e243e17 commit 7bc0969
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 13 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ This project contains several sequence alignment algorithms that can also produc
| ------------------ | --------- | -------- | ------------------------------ |
|Needleman-Wunsch | [x] | [x] | [x] |
|Gotoh (Global) | [x] | [x] | [x] |
|Gotoh (Local) | [x] | [x] | [x] |
|Smith-Waterman | [x] | [x] | [x] |
|Waterman-Smith-Beyer | [x] | [x] | [x] |
|Wagner-Fischer | [x] | [x] | [x] |
Expand All @@ -25,7 +26,9 @@ This project contains several sequence alignment algorithms that can also produc
## Algorithms Explained
[Needleman-Wunsch](https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm)

[Gotoh -Global](https://helios2.mi.parisdescartes.fr/~lomn/Cours/BI/Material/gap-penalty-gotoh.pdf)
[Gotoh (Global)](https://helios2.mi.parisdescartes.fr/~lomn/Cours/BI/Material/gap-penalty-gotoh.pdf)

[Gotoh (Local)](http://rna.informatik.uni-freiburg.de/Teaching/index.jsp?toolName=Gotoh%20(Local))

[Smith-Waterman ](https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm)

Expand Down Expand Up @@ -97,7 +100,6 @@ print(needleman_wunsch.matrix("AFTG","ACTG"))

# Work In Progress

Jaro and Jaro-Winkler algorithms.
Importing and parsing FASTA, FASTQ, and PDB files.

# Caveats
Expand Down
1 change: 1 addition & 0 deletions limestone/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
from limestone.algorithms.editdistance import longest_common_subsequence
from limestone.algorithms.editdistance import shortest_common_supersequence
from limestone.algorithms.editdistance import gotoh
from limestone.algorithms.editdistance import gotoh_local
77 changes: 69 additions & 8 deletions limestone/algorithms/editdistance.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,11 @@
raise ImportError("Please pip install all dependencies from requirements.txt!")

def main():
qqs = "GCATGCCAT"
sss = "CATGCATCGAC"

print(gotoh.align(qqs, sss))
print(gotoh.distance(qqs, sss))
print(gotoh.normalized_distance(qqs, sss))
print(gotoh.similarity(qqs, sss))
qqs = "WHORUNSTHEWORLDGIRLS"
sss = "DOGSSTHEWORLDEXUBERENT"

print(gotoh_local(qqs, sss))
print(gotoh_local.align(qqs, sss))

class Wagner_Fischer(__GLOBALBASE): #Levenshtein Distance
def __init__(self)->None:
Expand Down Expand Up @@ -312,7 +309,6 @@ def __call__(self, querySequence: str, subjectSequence: str)->tuple[NDArray[floa
qs.extend([x.upper() for x in querySequence])
ss.extend([x.upper() for x in subjectSequence])

#LETS GIVE POSITIVES A TRY FOR THE TESTS
#matrix initialisation
self.D = numpy.full((len(qs),len(ss)), numpy.inf)
self.P = numpy.full((len(qs), len(ss)), numpy.inf)
Expand Down Expand Up @@ -379,6 +375,70 @@ def align(self, querySequence: str, subjectSequence: str)->str:

return f"{queryAlign}\n{subjectAlign}"

class Gotoh_Local(__LOCALBASE):
def __init__(self, match_score:int = 1, mismatch_penalty:int = 1, new_gap_penalty:int = 2, continue_gap_penalty: int = 1)->None:
self.match_score = match_score
self.mismatch_penalty = mismatch_penalty
self.new_gap_penalty = new_gap_penalty
self.continue_gap_penalty = continue_gap_penalty

def __call__(self, querySequence: str, subjectSequence: str)->tuple[NDArray[float64],NDArray[float64],NDArray[float64]]:
qs,ss = [""], [""]
qs.extend([x.upper() for x in querySequence])
ss.extend([x.upper() for x in subjectSequence])

#matrix initialisation
self.D = numpy.full((len(qs),len(ss)), -numpy.inf)
self.P = numpy.full((len(qs), len(ss)), -numpy.inf)
self.P[:,0] = 0
self.Q = numpy.full((len(qs), len(ss)), -numpy.inf)
self.Q[0,:] = 0
#initialisation of starter values for first column and first row
self.D[:,0] = 0
self.D[0,:] = 0

for i in range(1, len(qs)):
for j in range(1, len(ss)):
match = self.D[i - 1, j - 1] + (self.match_score if qs[i] == ss[j] else -self.mismatch_penalty)
self.P[i, j] = max(self.D[i - 1, j] - self.new_gap_penalty - self.continue_gap_penalty, self.P[i - 1, j] - self.continue_gap_penalty)
self.Q[i, j] = max(self.D[i, j - 1] - self.new_gap_penalty - self.continue_gap_penalty, self.Q[i, j - 1] - self.continue_gap_penalty)
self.D[i, j] = max(match, self.P[i, j], self.Q[i, j], 0)
return self.D, self.P, self.Q

def matrix(self, querySequence: str, subjectSequence: str)->tuple[NDArray[float64], NDArray[float64], NDArray[float64]]:
D, P, Q = self(querySequence, subjectSequence)
return D, P, Q

def align(self, querySequence: str, subjectSequence: str)->str:
matrix, _, _ = self(querySequence, subjectSequence)

qs, ss = [x.upper() for x in querySequence], [x.upper() for x in subjectSequence]
if matrix.max() == 0:
return "There is no local alignment!"

#finds the largest value closest to bottom right of matrix
i, j = list(numpy.where(matrix == matrix.max()))
i, j = i[-1], j[-1]

subjectAlign = []
queryAlign= []
score = matrix.max()
while score > 0:
score = matrix[i][j]
if score == 0:
break
queryAlign.append(qs[i-1])
subjectAlign.append(ss[j-1])
i -= 1
j -= 1
queryAlign = "".join(queryAlign[::-1])
subjectAlign = "".join(subjectAlign[::-1])
return f"{queryAlign}\n{subjectAlign}"

def similarity(self, querySequence: str, subjectSequence: str)->float:
matrix, _, _ = self(querySequence, subjectSequence)
return matrix.max()

class Hirschberg():
def __init__(self, match_score: int = 1, mismatch_penalty: int = -1, gap_penalty: int = -2)->None:
self.match_score = match_score
Expand Down Expand Up @@ -737,6 +797,7 @@ def distance(self, querySequence: str, subjectSequence: str)->float:
longest_common_subsequence = Longest_Common_Subsequence()
shortest_common_supersequence = Shortest_Common_Supersequence()
gotoh = Gotoh()
gotoh_local = Gotoh_Local()

if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions limestone/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ def test_LCS():
def test_SCS():
[ls.shortest_common_supersequence.distance(a, b) for a, b in zip(test_strings1, test_strings2)]

def test_G():
[ls.gotoh.distance(a, b) for a, b in zip(test_strings1, test_strings2)]

def test_GL():
[ls.gotoh_local.distance(a, b) for a, b in zip(test_strings1, test_strings2)]

def main():
print("Each of the following tests creates a list comprehension of 100 sequences that are 50 to 100 charachters each")
print(f"Needleman Wunsch test: Time = {timeit('test_NW()', globals = globals(), number=1):0.4f}")
Expand All @@ -54,6 +60,8 @@ def main():
print(f"Jaro Winkler test: Time = {timeit('test_JW()', globals = globals(), number=1):0.4f}")
print(f"Longest Common Subsequence test: Time = {timeit('test_LCS()', globals = globals(), number=1):0.4f}")
print(f"Shortest Common Supersequence test: Time = {timeit('test_SCS()', globals = globals(), number=1):0.4f}")
print(f"Gotoh (Global) test: Time = {timeit('test_G()', globals = globals(), number=1):0.4f}")
print(f"Gotoh (Local) test: Time = {timeit('test_GL()', globals = globals(), number=1):0.4f}")

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "limestone"
version = "0.12.0"
version = "0.13.0"
authors = [
{ name ="Andrew Hennis", email="[email protected]" },
]
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
numpy>=1.26.4
numpy>=2.0.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name='limestone',
version='0.12.0',
version='0.13.0',

packages=['limestone'],
package_data={'': ['*.py']},
Expand Down
72 changes: 72 additions & 0 deletions tests/test_GotohLocal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@

from __future__ import annotations
import unittest
from limestone import gotoh_local

class TestGotohLocal(unittest.TestCase):
def test_distance_diff(self):
dist = gotoh_local.distance("ACTG", "FHYU")
self.assertEqual(dist, 4.0)

def test_similarity_diff(self):
sim = gotoh_local.similarity("ACTG", "FHYU")
self.assertEqual(sim, 0.0)

def test_norm_distance_diff(self):
dist = gotoh_local.normalized_distance("ACTG", "FHYU")
self.assertEqual(dist, 1.0)

def test_norm_similarity_diff(self):
sim = gotoh_local.normalized_similarity("ACTG", "FHYU")
self.assertEqual(sim, 0.0)

def test_distance_sim(self):
dist = gotoh_local.distance("ACTG", "ACTG")
self.assertEqual(dist, 0.0)

def test_similarity_sim(self):
sim = gotoh_local.similarity("ACTG", "ACTG")
self.assertEqual(sim, 4.0)

def test_norm_distance_sim(self):
dist = gotoh_local.normalized_distance("ACTG", "ACTG")
self.assertEqual(dist, 0.0)

def test_norm_distance1(self):
dist = gotoh_local.normalized_distance("ACTG", "ACTF")
self.assertEqual(dist, 0.25)

def test_norm_distance2(self):
dist = gotoh_local.normalized_distance("ACTG", "ACFF")
self.assertEqual(dist, 0.5)

def test_norm_distance3(self):
dist = gotoh_local.normalized_distance("ACTG", "AFFF")
self.assertEqual(dist, 0.75)

def test_norm_similarity_sim(self):
sim = gotoh_local.normalized_similarity("ACTG", "ACTG")
self.assertEqual(sim, 1.0)

def test_norm_similarity1(self):
dist = gotoh_local.normalized_similarity("ACTG", "ACTF")
self.assertEqual(dist, 0.75)

def test_norm_similarity2(self):
dist = gotoh_local.normalized_similarity("ACTG", "ACFF")
self.assertEqual(dist, 0.5)

def test_norm_similarity3(self):
dist = gotoh_local.normalized_similarity("ACTG", "AFFF")
self.assertEqual(dist, 0.25)

def test_align(self):
alignment = gotoh_local.align("BA", "ABA")
self.assertEqual(alignment, "BA\nBA")

def test_align2(self):
alignment = gotoh_local.align("AGTACGCA", "TATGC")
self.assertEqual(alignment, "TACGC\nTATGC")

if __name__ == '__main__':
unittest.main()

0 comments on commit 7bc0969

Please sign in to comment.