Gotoh Local

lignum-vitae · Nov 15, 2024 · 7bc0969 · 7bc0969
1 parent e243e17
commit 7bc0969
Show file tree

Hide file tree

Showing 8 changed files with 157 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -11,6 +11,7 @@ This project contains several sequence alignment algorithms that can also produc
 | ------------------           | --------- | -------- | ------------------------------ |
 |Needleman-Wunsch              |    [x]    |    [x]   |               [x]              |
 |Gotoh (Global)                |    [x]    |    [x]   |               [x]              |
+|Gotoh (Local)                 |    [x]    |    [x]   |               [x]              |
 |Smith-Waterman                |    [x]    |    [x]   |               [x]              |
 |Waterman-Smith-Beyer          |    [x]    |    [x]   |               [x]              |
 |Wagner-Fischer                |    [x]    |    [x]   |               [x]              |
@@ -25,7 +26,9 @@ This project contains several sequence alignment algorithms that can also produc
 ## Algorithms Explained
 [Needleman-Wunsch](https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm)
 
-[Gotoh -Global](https://helios2.mi.parisdescartes.fr/~lomn/Cours/BI/Material/gap-penalty-gotoh.pdf)
+[Gotoh (Global)](https://helios2.mi.parisdescartes.fr/~lomn/Cours/BI/Material/gap-penalty-gotoh.pdf)
+
+[Gotoh (Local)](http://rna.informatik.uni-freiburg.de/Teaching/index.jsp?toolName=Gotoh%20(Local))
 
 [Smith-Waterman ](https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm)
 
@@ -97,7 +100,6 @@ print(needleman_wunsch.matrix("AFTG","ACTG"))
 
 # Work In Progress
 
-Jaro and Jaro-Winkler algorithms.
 Importing and parsing FASTA, FASTQ, and PDB files.
 
 # Caveats

diff --git a/limestone/__init__.py b/limestone/__init__.py
@@ -12,3 +12,4 @@
 from limestone.algorithms.editdistance import longest_common_subsequence
 from limestone.algorithms.editdistance import shortest_common_supersequence
 from limestone.algorithms.editdistance import gotoh
+from limestone.algorithms.editdistance import gotoh_local
diff --git a/limestone/algorithms/editdistance.py b/limestone/algorithms/editdistance.py
@@ -13,14 +13,11 @@
     raise ImportError("Please pip install all dependencies from requirements.txt!")
 
 def main():
-    qqs = "GCATGCCAT"
-    sss = "CATGCATCGAC"
-
-    print(gotoh.align(qqs, sss))
-    print(gotoh.distance(qqs, sss))
-    print(gotoh.normalized_distance(qqs, sss))
-    print(gotoh.similarity(qqs, sss))
+    qqs = "WHORUNSTHEWORLDGIRLS"
+    sss = "DOGSSTHEWORLDEXUBERENT"
 
+    print(gotoh_local(qqs, sss)) 
+    print(gotoh_local.align(qqs, sss)) 
 
 class Wagner_Fischer(__GLOBALBASE): #Levenshtein Distance
     def __init__(self)->None:
@@ -312,7 +309,6 @@ def __call__(self, querySequence: str, subjectSequence: str)->tuple[NDArray[floa
         qs.extend([x.upper() for x in querySequence])
         ss.extend([x.upper() for x in subjectSequence])
 
-        #LETS GIVE POSITIVES A TRY FOR THE TESTS
         #matrix initialisation
         self.D = numpy.full((len(qs),len(ss)), numpy.inf)
         self.P = numpy.full((len(qs), len(ss)), numpy.inf)
@@ -379,6 +375,70 @@ def align(self, querySequence: str, subjectSequence: str)->str:
 
         return f"{queryAlign}\n{subjectAlign}"
 
+class Gotoh_Local(__LOCALBASE):
+    def __init__(self, match_score:int = 1, mismatch_penalty:int = 1, new_gap_penalty:int = 2, continue_gap_penalty: int = 1)->None:
+        self.match_score = match_score
+        self.mismatch_penalty = mismatch_penalty
+        self.new_gap_penalty = new_gap_penalty
+        self.continue_gap_penalty = continue_gap_penalty
+
+    def __call__(self, querySequence: str, subjectSequence: str)->tuple[NDArray[float64],NDArray[float64],NDArray[float64]]:
+        qs,ss = [""], [""] 
+        qs.extend([x.upper() for x in querySequence])
+        ss.extend([x.upper() for x in subjectSequence])
+
+        #matrix initialisation
+        self.D = numpy.full((len(qs),len(ss)), -numpy.inf)
+        self.P = numpy.full((len(qs), len(ss)), -numpy.inf)
+        self.P[:,0] = 0
+        self.Q = numpy.full((len(qs), len(ss)), -numpy.inf)
+        self.Q[0,:] = 0
+        #initialisation of starter values for first column and first row
+        self.D[:,0] = 0
+        self.D[0,:] = 0
+
+        for i in range(1, len(qs)):
+          for j in range(1, len(ss)):
+              match = self.D[i - 1, j - 1] + (self.match_score if qs[i] == ss[j] else -self.mismatch_penalty)
+              self.P[i, j] = max(self.D[i - 1, j] - self.new_gap_penalty - self.continue_gap_penalty, self.P[i - 1, j] - self.continue_gap_penalty)
+              self.Q[i, j] = max(self.D[i, j - 1] - self.new_gap_penalty - self.continue_gap_penalty, self.Q[i, j - 1] - self.continue_gap_penalty)
+              self.D[i, j] = max(match, self.P[i, j], self.Q[i, j], 0)
+        return self.D, self.P, self.Q
+
+    def matrix(self, querySequence: str, subjectSequence: str)->tuple[NDArray[float64], NDArray[float64], NDArray[float64]]:
+        D, P, Q = self(querySequence, subjectSequence)
+        return D, P, Q
+
+    def align(self, querySequence: str, subjectSequence: str)->str:
+      matrix, _, _ = self(querySequence, subjectSequence)
+
+      qs, ss = [x.upper() for x in querySequence], [x.upper() for x in subjectSequence]
+      if matrix.max() == 0:
+        return "There is no local alignment!"
+
+      #finds the largest value closest to bottom right of matrix
+      i, j = list(numpy.where(matrix == matrix.max()))
+      i, j = i[-1], j[-1]
+
+      subjectAlign = []
+      queryAlign= []
+      score = matrix.max()
+      while score > 0:
+          score = matrix[i][j]
+          if score == 0:
+              break
+          queryAlign.append(qs[i-1])
+          subjectAlign.append(ss[j-1])
+          i -= 1
+          j -= 1
+      queryAlign = "".join(queryAlign[::-1])
+      subjectAlign = "".join(subjectAlign[::-1])
+      return f"{queryAlign}\n{subjectAlign}"
+
+    def similarity(self, querySequence: str, subjectSequence: str)->float:
+      matrix, _, _  = self(querySequence, subjectSequence)
+      return matrix.max()
+
 class Hirschberg():
     def __init__(self, match_score: int = 1, mismatch_penalty: int = -1, gap_penalty: int = -2)->None:
         self.match_score = match_score
@@ -737,6 +797,7 @@ def distance(self, querySequence: str, subjectSequence: str)->float:
 longest_common_subsequence = Longest_Common_Subsequence()
 shortest_common_supersequence = Shortest_Common_Supersequence()
 gotoh = Gotoh()
+gotoh_local = Gotoh_Local()
 
 if __name__ == "__main__":
     main()
diff --git a/limestone/benchmarks.py b/limestone/benchmarks.py
@@ -42,6 +42,12 @@ def test_LCS():
 def test_SCS():
     [ls.shortest_common_supersequence.distance(a, b) for a, b in zip(test_strings1, test_strings2)]
 
+def test_G():
+    [ls.gotoh.distance(a, b) for a, b in zip(test_strings1, test_strings2)]
+
+def test_GL():
+    [ls.gotoh_local.distance(a, b) for a, b in zip(test_strings1, test_strings2)]
+
 def main():
     print("Each of the following tests creates a list comprehension of 100 sequences that are 50 to 100 charachters each")
     print(f"Needleman Wunsch test: Time = {timeit('test_NW()', globals = globals(), number=1):0.4f}")
@@ -54,6 +60,8 @@ def main():
     print(f"Jaro Winkler test: Time = {timeit('test_JW()', globals = globals(), number=1):0.4f}")
     print(f"Longest Common Subsequence test: Time = {timeit('test_LCS()', globals = globals(), number=1):0.4f}")
     print(f"Shortest Common Supersequence test: Time = {timeit('test_SCS()', globals = globals(), number=1):0.4f}")
+    print(f"Gotoh (Global) test: Time = {timeit('test_G()', globals = globals(), number=1):0.4f}")
+    print(f"Gotoh (Local) test: Time = {timeit('test_GL()', globals = globals(), number=1):0.4f}")
 
 if __name__ == "__main__":
     main()
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "limestone"
-version = "0.12.0"
+version = "0.13.0"
 authors = [
 	{ name ="Andrew Hennis", email="[email protected]" },
 ]

diff --git a/requirements.txt b/requirements.txt
@@ -1 +1 @@
-numpy>=1.26.4
+numpy>=2.0.0
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 
 setup(
     name='limestone',
-    version='0.12.0',
+    version='0.13.0',
 
     packages=['limestone'],
     package_data={'': ['*.py']},

diff --git a/tests/test_GotohLocal.py b/tests/test_GotohLocal.py
@@ -0,0 +1,72 @@
+
+from __future__ import annotations
+import unittest
+from limestone import gotoh_local
+
+class TestGotohLocal(unittest.TestCase):
+    def test_distance_diff(self):
+        dist = gotoh_local.distance("ACTG", "FHYU")
+        self.assertEqual(dist, 4.0)
+
+    def test_similarity_diff(self):
+        sim = gotoh_local.similarity("ACTG", "FHYU")
+        self.assertEqual(sim, 0.0)
+
+    def test_norm_distance_diff(self):
+        dist = gotoh_local.normalized_distance("ACTG", "FHYU")
+        self.assertEqual(dist, 1.0)
+
+    def test_norm_similarity_diff(self):
+        sim = gotoh_local.normalized_similarity("ACTG", "FHYU")
+        self.assertEqual(sim, 0.0)
+
+    def test_distance_sim(self):
+        dist = gotoh_local.distance("ACTG", "ACTG")
+        self.assertEqual(dist, 0.0)
+
+    def test_similarity_sim(self):
+        sim = gotoh_local.similarity("ACTG", "ACTG")
+        self.assertEqual(sim, 4.0)
+
+    def test_norm_distance_sim(self):
+        dist = gotoh_local.normalized_distance("ACTG", "ACTG")
+        self.assertEqual(dist, 0.0)
+
+    def test_norm_distance1(self):
+        dist = gotoh_local.normalized_distance("ACTG", "ACTF")
+        self.assertEqual(dist, 0.25)
+
+    def test_norm_distance2(self):
+        dist = gotoh_local.normalized_distance("ACTG", "ACFF")
+        self.assertEqual(dist, 0.5)
+
+    def test_norm_distance3(self):
+        dist = gotoh_local.normalized_distance("ACTG", "AFFF")
+        self.assertEqual(dist, 0.75)
+
+    def test_norm_similarity_sim(self):
+        sim = gotoh_local.normalized_similarity("ACTG", "ACTG")
+        self.assertEqual(sim, 1.0)
+
+    def test_norm_similarity1(self):
+        dist = gotoh_local.normalized_similarity("ACTG", "ACTF")
+        self.assertEqual(dist, 0.75)
+
+    def test_norm_similarity2(self):
+        dist = gotoh_local.normalized_similarity("ACTG", "ACFF")
+        self.assertEqual(dist, 0.5)
+
+    def test_norm_similarity3(self):
+        dist = gotoh_local.normalized_similarity("ACTG", "AFFF")
+        self.assertEqual(dist, 0.25) 
+
+    def test_align(self):
+        alignment = gotoh_local.align("BA", "ABA")
+        self.assertEqual(alignment, "BA\nBA")
+
+    def test_align2(self):
+        alignment = gotoh_local.align("AGTACGCA", "TATGC")
+        self.assertEqual(alignment, "TACGC\nTATGC")
+
+if __name__ == '__main__':
+    unittest.main()