From 47419989e0988acce608fb54ba21aa14d662fff3 Mon Sep 17 00:00:00 2001
From: Oleg <odemidenko@gmail.com>
Date: Tue, 30 Jan 2018 16:17:02 +0300
Subject: [PATCH 1/2] fixed kNN for kNNBasic on issue #131

---
 surprise/prediction_algorithms/algo_base.py |  83 --------
 surprise/prediction_algorithms/knns.py      | 212 ++++++++++++++++++--
 2 files changed, 196 insertions(+), 99 deletions(-)

diff --git a/surprise/prediction_algorithms/algo_base.py b/surprise/prediction_algorithms/algo_base.py
index 844cb44e..4c0e8573 100644
--- a/surprise/prediction_algorithms/algo_base.py
+++ b/surprise/prediction_algorithms/algo_base.py
@@ -10,7 +10,6 @@ class :class:`AlgoBase` from which every single prediction algorithm has to
 
 from six import get_unbound_function as guf
 
-from .. import similarities as sims
 from .predictions import PredictionImpossible
 from .predictions import Prediction
 from .optimize_baselines import baseline_als
@@ -31,9 +30,6 @@ class AlgoBase(object):
     def __init__(self, **kwargs):
 
         self.bsl_options = kwargs.get('bsl_options', {})
-        self.sim_options = kwargs.get('sim_options', {})
-        if 'user_based' not in self.sim_options:
-            self.sim_options['user_based'] = True
         self.skip_train = False
 
         if (guf(self.__class__.fit) is guf(AlgoBase.fit) and
@@ -248,82 +244,3 @@ def compute_baselines(self):
                              ' for baseline computation.' +
                              ' Available methods are als and sgd.')
 
-    def compute_similarities(self):
-        """Build the similarity matrix.
-
-        The way the similarity matrix is computed depends on the
-        ``sim_options`` parameter passed at the creation of the algorithm (see
-        :ref:`similarity_measures_configuration`).
-
-        This method is only relevant for algorithms using a similarity measure,
-        such as the :ref:`k-NN algorithms <pred_package_knn_inpired>`.
-
-        Returns:
-            The similarity matrix."""
-
-        construction_func = {'cosine': sims.cosine,
-                             'msd': sims.msd,
-                             'pearson': sims.pearson,
-                             'pearson_baseline': sims.pearson_baseline}
-
-        if self.sim_options['user_based']:
-            n_x, yr = self.trainset.n_users, self.trainset.ir
-        else:
-            n_x, yr = self.trainset.n_items, self.trainset.ur
-
-        min_support = self.sim_options.get('min_support', 1)
-
-        args = [n_x, yr, min_support]
-
-        name = self.sim_options.get('name', 'msd').lower()
-        if name == 'pearson_baseline':
-            shrinkage = self.sim_options.get('shrinkage', 100)
-            bu, bi = self.compute_baselines()
-            if self.sim_options['user_based']:
-                bx, by = bu, bi
-            else:
-                bx, by = bi, bu
-
-            args += [self.trainset.global_mean, bx, by, shrinkage]
-
-        try:
-            print('Computing the {0} similarity matrix...'.format(name))
-            sim = construction_func[name](*args)
-            print('Done computing similarity matrix.')
-            return sim
-        except KeyError:
-            raise NameError('Wrong sim name ' + name + '. Allowed values ' +
-                            'are ' + ', '.join(construction_func.keys()) + '.')
-
-    def get_neighbors(self, iid, k):
-        """Return the ``k`` nearest neighbors of ``iid``, which is the inner id
-        of a user or an item, depending on the ``user_based`` field of
-        ``sim_options`` (see :ref:`similarity_measures_configuration`).
-
-        As the similarities are computed on the basis of a similarity measure,
-        this method is only relevant for algorithms using a similarity measure,
-        such as the :ref:`k-NN algorithms <pred_package_knn_inpired>`.
-
-        For a usage example, see the :ref:`FAQ <get_k_nearest_neighbors>`.
-
-        Args:
-            iid(int): The (inner) id of the user (or item) for which we want
-                the nearest neighbors. See :ref:`this note<raw_inner_note>`.
-
-            k(int): The number of neighbors to retrieve.
-
-        Returns:
-            The list of the ``k`` (inner) ids of the closest users (or items)
-            to ``iid``.
-        """
-
-        if self.sim_options['user_based']:
-            all_instances = self.trainset.all_users
-        else:
-            all_instances = self.trainset.all_items
-
-        others = [(x, self.sim[iid, x]) for x in all_instances() if x != iid]
-        others.sort(key=lambda tple: tple[1], reverse=True)
-        k_nearest_neighbors = [j for (j, _) in others[:k]]
-
-        return k_nearest_neighbors
diff --git a/surprise/prediction_algorithms/knns.py b/surprise/prediction_algorithms/knns.py
index 069da4d3..9fea1dcd 100644
--- a/surprise/prediction_algorithms/knns.py
+++ b/surprise/prediction_algorithms/knns.py
@@ -7,7 +7,9 @@
 import numpy as np
 from six import iteritems
 import heapq
+from collections import defaultdict
 
+from .. import similarities as sims
 from .predictions import PredictionImpossible
 from .algo_base import AlgoBase
 
@@ -25,11 +27,32 @@ class SymmetricAlgo(AlgoBase):
 
     When the algo is user-based x denotes a user and y an item. Else, it's
     reversed.
+
+    Args:
+        sim_options(dict, optional): Parameters for similarity metrics,
+            used to define nearest nighbours.
+            See :ref:`similarity-measure-configuration` for usage.
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, sim_options={}, **kwargs):
+    def __init__(self, sim_options={}, fix_k_neighbors=False, **kwargs):
+
+        AlgoBase.__init__(self, **kwargs)
 
-        AlgoBase.__init__(self, sim_options=sim_options, **kwargs)
+        self.sim_options = sim_options
+        if 'user_based' not in self.sim_options:
+            self.sim_options['user_based'] = True
+        self.fix_k_neighbors = fix_k_neighbors
 
     def fit(self, trainset):
 
@@ -51,6 +74,100 @@ def switch(self, u_stuff, i_stuff):
         else:
             return i_stuff, u_stuff
 
+    def select_k_neighbors(self):
+
+        k_neighbors=defaultdict(list)
+
+        n=self.sim.shape[1]
+        # find indexes of k+1 closest neighbors (as one of them is the element
+        # itself)
+        k_plus_neighbors=np.argpartition(self.sim,n-self.k-1)[:,n-self.k-1:]
+
+        for i,neighbors in enumerate(k_plus_neighbors):
+            k_neighbors[i] = [(j,self.sim[i,j]) for j in neighbors if j!=i]
+
+        self.kNN = k_neighbors
+
+    def compute_similarities(self):
+        """Build the similarity matrix.
+
+        The way the similarity matrix is computed depends on the
+        ``sim_options`` parameter passed at the creation of the algorithm (see
+        :ref:`similarity_measures_configuration`).
+
+        This method is only relevant for algorithms using a similarity measure,
+        such as the :ref:`k-NN algorithms <pred_package_knn_inpired>`.
+
+        Returns:
+            The similarity matrix."""
+
+        construction_func = {'cosine': sims.cosine,
+                             'msd': sims.msd,
+                             'pearson': sims.pearson,
+                             'pearson_baseline': sims.pearson_baseline}
+
+        if self.sim_options['user_based']:
+            n_x, yr = self.trainset.n_users, self.trainset.ir
+        else:
+            n_x, yr = self.trainset.n_items, self.trainset.ur
+
+        min_support = self.sim_options.get('min_support', 1)
+
+        args = [n_x, yr, min_support]
+
+        name = self.sim_options.get('name', 'msd').lower()
+        if name == 'pearson_baseline':
+            shrinkage = self.sim_options.get('shrinkage', 100)
+            bu, bi = self.compute_baselines()
+            if self.sim_options['user_based']:
+                bx, by = bu, bi
+            else:
+                bx, by = bi, bu
+
+            args += [self.trainset.global_mean, bx, by, shrinkage]
+
+        try:
+            print('Computing the {0} similarity matrix...'.format(name))
+            sim = construction_func[name](*args)
+            print('Done computing similarity matrix.')
+            return sim
+        except KeyError:
+            raise NameError('Wrong sim name ' + name + '. Allowed values ' +
+                            'are ' + ', '.join(construction_func.keys()) + '.')
+
+    def get_neighbors(self, iid, k):
+        """Return the ``k`` nearest neighbors of ``iid``, which is the inner id
+        of a user or an item, depending on the ``user_based`` field of
+        ``sim_options`` (see :ref:`similarity_measures_configuration`).
+
+        As the similarities are computed on the basis of a similarity measure,
+        this method is only relevant for algorithms using a similarity measure,
+        such as the :ref:`k-NN algorithms <pred_package_knn_inpired>`.
+
+        For a usage example, see the :ref:`FAQ <get_k_nearest_neighbors>`.
+
+        Args:
+            iid(int): The (inner) id of the user (or item) for which we want
+                the nearest neighbors. See :ref:`this note<raw_inner_note>`.
+
+            k(int): The number of neighbors to retrieve.
+
+        Returns:
+            The list of the ``k`` (inner) ids of the closest users (or items)
+            to ``iid``.
+        """
+
+        if self.sim_options['user_based']:
+            all_instances = self.trainset.all_users
+        else:
+            all_instances = self.trainset.all_items
+
+        others = [(x, self.sim[iid, x]) for x in all_instances() if x != iid]
+        others.sort(key=lambda tple: tple[1], reverse=True)
+        k_nearest_neighbors = [j for (j, _) in others[:k]]
+
+        return k_nearest_neighbors
+
 
 class KNNBasic(SymmetricAlgo):
     """A basic collaborative filtering algorithm.
@@ -81,11 +198,24 @@ class KNNBasic(SymmetricAlgo):
         sim_options(dict): A dictionary of options for the similarity
             measure. See :ref:`similarity_measures_configuration` for accepted
             options.
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, k=40, min_k=1, sim_options={}, **kwargs):
+    def __init__(self, k=40, min_k=1, sim_options={},
+                 fix_k_neighbors=False, **kwargs):
 
-        SymmetricAlgo.__init__(self, sim_options=sim_options, **kwargs)
+        SymmetricAlgo.__init__(self, sim_options=sim_options,
+                               fix_k_neighbors=fix_k_neighbors, **kwargs)
         self.k = k
         self.min_k = min_k
 
@@ -94,6 +224,10 @@ def fit(self, trainset):
         SymmetricAlgo.fit(self, trainset)
         self.sim = self.compute_similarities()
 
+        if self.fix_k_neighbors:
+            self.select_k_neighbors()
+            del self.sim
+
         return self
 
     def estimate(self, u, i):
@@ -101,15 +235,22 @@ def estimate(self, u, i):
         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
             raise PredictionImpossible('User and/or item is unkown.')
 
-        x, y = self.switch(u, i)
-
-        neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]]
-        k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])
+        if self.fix_k_neighbors:
+            k_neighbors=[]
+            x, y = self.switch(u, i)
+            for (x2, r) in self.yr[y]:
+                for t in self.kNN[x]:
+                    if x2 == t[0]:
+                        k_neighbors.append((t[1], r))
+        else:
+            x, y = self.switch(u, i)
+            neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]]
+            k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])
 
         # compute weighted average
         sum_sim = sum_ratings = actual_k = 0
         for (sim, r) in k_neighbors:
-            if sim > 0:
+            if sim > 0 and r!=0:
                 sum_sim += sim
                 sum_ratings += sim * r
                 actual_k += 1
@@ -156,11 +297,24 @@ class KNNWithMeans(SymmetricAlgo):
         sim_options(dict): A dictionary of options for the similarity
             measure. See :ref:`similarity_measures_configuration` for accepted
             options.
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, k=40, min_k=1, sim_options={}, **kwargs):
+    def __init__(self, k=40, min_k=1, sim_options={}, fix_k_neighbors=False,
+                 **kwargs):
 
-        SymmetricAlgo.__init__(self, sim_options=sim_options, **kwargs)
+        SymmetricAlgo.__init__(self, sim_options=sim_options,
+                               fix_k_neighbors=fix_k_neighbors, **kwargs)
 
         self.k = k
         self.min_k = min_k
@@ -251,13 +405,25 @@ class KNNBaseline(SymmetricAlgo):
         bsl_options(dict): A dictionary of options for the baseline estimates
             computation. See :ref:`baseline_estimates_configuration` for
             accepted options.
-
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, k=40, min_k=1, sim_options={}, bsl_options={}):
+    def __init__(self, k=40, min_k=1, sim_options={}, bsl_options={},
+                 fix_k_neighbors=False):
 
         SymmetricAlgo.__init__(self, sim_options=sim_options,
-                               bsl_options=bsl_options)
+                               bsl_options=bsl_options,
+                               fix_k_neighbors=fix_k_neighbors)
 
         self.k = k
         self.min_k = min_k
@@ -342,15 +508,29 @@ class KNNWithZScore(SymmetricAlgo):
         sim_options(dict): A dictionary of options for the similarity
             measure. See :ref:`similarity_measures_configuration` for accepted
             options.
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, k=40, min_k=1, sim_options={}, **kwargs):
+    def __init__(self, k=40, min_k=1, sim_options={}, fix_k_neighbors=False,
+                 **kwargs):
 
-        SymmetricAlgo.__init__(self, sim_options=sim_options, **kwargs)
+        SymmetricAlgo.__init__(self, sim_options=sim_options, **kwargs,
+                               fix_k_neighbors=fix_k_neighbors)
 
         self.k = k
         self.min_k = min_k
 
+
     def fit(self, trainset):
 
         SymmetricAlgo.fit(self, trainset)

From 56311f07b3169c5ec96e005b110220ffabae2946 Mon Sep 17 00:00:00 2001
From: Oleg <odemidenko@gmail.com>
Date: Tue, 30 Jan 2018 16:17:02 +0300
Subject: [PATCH 2/2] fixed kNN for kNNBasic on issue #131

---
 surprise/prediction_algorithms/algo_base.py |  84 --------
 surprise/prediction_algorithms/knns.py      | 217 ++++++++++++++++++--
 2 files changed, 198 insertions(+), 103 deletions(-)

diff --git a/surprise/prediction_algorithms/algo_base.py b/surprise/prediction_algorithms/algo_base.py
index 844cb44e..8e773d7e 100644
--- a/surprise/prediction_algorithms/algo_base.py
+++ b/surprise/prediction_algorithms/algo_base.py
@@ -10,7 +10,6 @@ class :class:`AlgoBase` from which every single prediction algorithm has to
 
 from six import get_unbound_function as guf
 
-from .. import similarities as sims
 from .predictions import PredictionImpossible
 from .predictions import Prediction
 from .optimize_baselines import baseline_als
@@ -31,9 +30,6 @@ class AlgoBase(object):
     def __init__(self, **kwargs):
 
         self.bsl_options = kwargs.get('bsl_options', {})
-        self.sim_options = kwargs.get('sim_options', {})
-        if 'user_based' not in self.sim_options:
-            self.sim_options['user_based'] = True
         self.skip_train = False
 
         if (guf(self.__class__.fit) is guf(AlgoBase.fit) and
@@ -247,83 +243,3 @@ def compute_baselines(self):
             raise ValueError('Invalid method ' + method_name +
                              ' for baseline computation.' +
                              ' Available methods are als and sgd.')
-
-    def compute_similarities(self):
-        """Build the similarity matrix.
-
-        The way the similarity matrix is computed depends on the
-        ``sim_options`` parameter passed at the creation of the algorithm (see
-        :ref:`similarity_measures_configuration`).
-
-        This method is only relevant for algorithms using a similarity measure,
-        such as the :ref:`k-NN algorithms <pred_package_knn_inpired>`.
-
-        Returns:
-            The similarity matrix."""
-
-        construction_func = {'cosine': sims.cosine,
-                             'msd': sims.msd,
-                             'pearson': sims.pearson,
-                             'pearson_baseline': sims.pearson_baseline}
-
-        if self.sim_options['user_based']:
-            n_x, yr = self.trainset.n_users, self.trainset.ir
-        else:
-            n_x, yr = self.trainset.n_items, self.trainset.ur
-
-        min_support = self.sim_options.get('min_support', 1)
-
-        args = [n_x, yr, min_support]
-
-        name = self.sim_options.get('name', 'msd').lower()
-        if name == 'pearson_baseline':
-            shrinkage = self.sim_options.get('shrinkage', 100)
-            bu, bi = self.compute_baselines()
-            if self.sim_options['user_based']:
-                bx, by = bu, bi
-            else:
-                bx, by = bi, bu
-
-            args += [self.trainset.global_mean, bx, by, shrinkage]
-
-        try:
-            print('Computing the {0} similarity matrix...'.format(name))
-            sim = construction_func[name](*args)
-            print('Done computing similarity matrix.')
-            return sim
-        except KeyError:
-            raise NameError('Wrong sim name ' + name + '. Allowed values ' +
-                            'are ' + ', '.join(construction_func.keys()) + '.')
-
-    def get_neighbors(self, iid, k):
-        """Return the ``k`` nearest neighbors of ``iid``, which is the inner id
-        of a user or an item, depending on the ``user_based`` field of
-        ``sim_options`` (see :ref:`similarity_measures_configuration`).
-
-        As the similarities are computed on the basis of a similarity measure,
-        this method is only relevant for algorithms using a similarity measure,
-        such as the :ref:`k-NN algorithms <pred_package_knn_inpired>`.
-
-        For a usage example, see the :ref:`FAQ <get_k_nearest_neighbors>`.
-
-        Args:
-            iid(int): The (inner) id of the user (or item) for which we want
-                the nearest neighbors. See :ref:`this note<raw_inner_note>`.
-
-            k(int): The number of neighbors to retrieve.
-
-        Returns:
-            The list of the ``k`` (inner) ids of the closest users (or items)
-            to ``iid``.
-        """
-
-        if self.sim_options['user_based']:
-            all_instances = self.trainset.all_users
-        else:
-            all_instances = self.trainset.all_items
-
-        others = [(x, self.sim[iid, x]) for x in all_instances() if x != iid]
-        others.sort(key=lambda tple: tple[1], reverse=True)
-        k_nearest_neighbors = [j for (j, _) in others[:k]]
-
-        return k_nearest_neighbors
diff --git a/surprise/prediction_algorithms/knns.py b/surprise/prediction_algorithms/knns.py
index 069da4d3..1ab03afc 100644
--- a/surprise/prediction_algorithms/knns.py
+++ b/surprise/prediction_algorithms/knns.py
@@ -7,14 +7,16 @@
 import numpy as np
 from six import iteritems
 import heapq
+from collections import defaultdict
 
+from .. import similarities as sims
 from .predictions import PredictionImpossible
 from .algo_base import AlgoBase
 
 
 # Important note: as soon as an algorithm uses a similarity measure, it should
 # also allow the bsl_options parameter because of the pearson_baseline
-# similarity. It can be done explicitely (e.g. KNNBaseline), or implicetely
+# similarity. It can be done explicitely (e.g. KNNBaseline), or implicitly
 # using kwargs (e.g. KNNBasic).
 
 class SymmetricAlgo(AlgoBase):
@@ -25,11 +27,32 @@ class SymmetricAlgo(AlgoBase):
 
     When the algo is user-based x denotes a user and y an item. Else, it's
     reversed.
+
+    Args:
+        sim_options(dict, optional): Parameters for similarity metrics,
+            used to define nearest nighbours.
+            See :ref:`similarity-measure-configuration` for usage.
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, sim_options={}, **kwargs):
+    def __init__(self, sim_options={}, fix_k_neighbors=False, **kwargs):
+
+        AlgoBase.__init__(self, **kwargs)
 
-        AlgoBase.__init__(self, sim_options=sim_options, **kwargs)
+        self.sim_options = sim_options
+        if 'user_based' not in self.sim_options:
+            self.sim_options['user_based'] = True
+        self.fix_k_neighbors = fix_k_neighbors
 
     def fit(self, trainset):
 
@@ -51,6 +74,100 @@ def switch(self, u_stuff, i_stuff):
         else:
             return i_stuff, u_stuff
 
+    def select_k_neighbors(self):
+
+        k_neighbors = defaultdict(list)
+
+        n = self.sim.shape[1]
+        # find indexes of k+1 closest neighbors (as one of them is the element
+        # itself)
+        k_plus_neighbors = (np.argpartition(self.sim, n - self.k - 1)
+                            [:, n - self.k - 1:])
+
+        for i, neighbors in enumerate(k_plus_neighbors):
+            k_neighbors[i] = [(j, self.sim[i, j]) for j in neighbors if j != i]
+
+        self.kNN = k_neighbors
+
+    def compute_similarities(self):
+        """Build the similarity matrix.
+
+        The way the similarity matrix is computed depends on the
+        ``sim_options`` parameter passed at the creation of the algorithm (see
+        :ref:`similarity_measures_configuration`).
+
+        This method is only relevant for algorithms using a similarity measure,
+        such as the :ref:`k-NN algorithms <pred_package_knn_inpired>`.
+
+        Returns:
+            The similarity matrix."""
+
+        construction_func = {'cosine': sims.cosine, 'msd': sims.msd,
+                             'pearson': sims.pearson,
+                             'pearson_baseline': sims.pearson_baseline}
+
+        if self.sim_options['user_based']:
+            n_x, yr = self.trainset.n_users, self.trainset.ir
+        else:
+            n_x, yr = self.trainset.n_items, self.trainset.ur
+
+        min_support = self.sim_options.get('min_support', 1)
+
+        args = [n_x, yr, min_support]
+
+        name = self.sim_options.get('name', 'msd').lower()
+        if name == 'pearson_baseline':
+            shrinkage = self.sim_options.get('shrinkage', 100)
+            bu, bi = self.compute_baselines()
+            if self.sim_options['user_based']:
+                bx, by = bu, bi
+            else:
+                bx, by = bi, bu
+
+            args += [self.trainset.global_mean, bx, by, shrinkage]
+
+        try:
+            print('Computing the {0} similarity matrix...'.format(name))
+            sim = construction_func[name](*args)
+            print('Done computing similarity matrix.')
+            return sim
+        except KeyError:
+            raise NameError('Wrong sim name ' + name + '. Allowed values ' +
+                            'are ' + ', '.join(construction_func.keys()) + '.')
+
+    def get_neighbors(self, iid, k):
+        """Return the ``k`` nearest neighbors of ``iid``, which is the inner id
+        of a user or an item, depending on the ``user_based`` field of
+        ``sim_options`` (see :ref:`similarity_measures_configuration`).
+
+        As the similarities are computed on the basis of a similarity measure,
+        this method is only relevant for algorithms using a similarity measure,
+        such as the :ref:`k-NN algorithms <pred_package_knn_inpired>`.
+
+        For a usage example, see the :ref:`FAQ <get_k_nearest_neighbors>`.
+
+        Args:
+            iid(int): The (inner) id of the user (or item) for which we want
+                the nearest neighbors. See :ref:`this note<raw_inner_note>`.
+
+            k(int): The number of neighbors to retrieve.
+
+        Returns:
+            The list of the ``k`` (inner) ids of the closest users (or items)
+            to ``iid``.
+        """
+
+        if self.sim_options['user_based']:
+            all_instances = self.trainset.all_users
+        else:
+            all_instances = self.trainset.all_items
+
+        others = [(x, self.sim[iid, x]) for x in all_instances() if x != iid]
+        others.sort(key=lambda tple: tple[1], reverse=True)
+        k_nearest_neighbors = [j for (j, _) in others[:k]]
+
+        return k_nearest_neighbors
+
 
 class KNNBasic(SymmetricAlgo):
     """A basic collaborative filtering algorithm.
@@ -81,11 +198,24 @@ class KNNBasic(SymmetricAlgo):
         sim_options(dict): A dictionary of options for the similarity
             measure. See :ref:`similarity_measures_configuration` for accepted
             options.
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, k=40, min_k=1, sim_options={}, **kwargs):
+    def __init__(self, k=40, min_k=1, sim_options={}, fix_k_neighbors=False,
+                 **kwargs):
 
-        SymmetricAlgo.__init__(self, sim_options=sim_options, **kwargs)
+        SymmetricAlgo.__init__(self, sim_options=sim_options,
+                               fix_k_neighbors=fix_k_neighbors, **kwargs)
         self.k = k
         self.min_k = min_k
 
@@ -94,6 +224,10 @@ def fit(self, trainset):
         SymmetricAlgo.fit(self, trainset)
         self.sim = self.compute_similarities()
 
+        if self.fix_k_neighbors:
+            self.select_k_neighbors()
+            del self.sim
+
         return self
 
     def estimate(self, u, i):
@@ -101,15 +235,22 @@ def estimate(self, u, i):
         if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
             raise PredictionImpossible('User and/or item is unkown.')
 
-        x, y = self.switch(u, i)
-
-        neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]]
-        k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])
+        if self.fix_k_neighbors:
+            k_neighbors = []
+            x, y = self.switch(u, i)
+            for (x2, r) in self.yr[y]:
+                for t in self.kNN[x]:
+                    if x2 == t[0]:
+                        k_neighbors.append((t[1], r))
+        else:
+            x, y = self.switch(u, i)
+            neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]]
+            k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])
 
         # compute weighted average
         sum_sim = sum_ratings = actual_k = 0
         for (sim, r) in k_neighbors:
-            if sim > 0:
+            if sim > 0 and r != 0:
                 sum_sim += sim
                 sum_ratings += sim * r
                 actual_k += 1
@@ -156,11 +297,24 @@ class KNNWithMeans(SymmetricAlgo):
         sim_options(dict): A dictionary of options for the similarity
             measure. See :ref:`similarity_measures_configuration` for accepted
             options.
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, k=40, min_k=1, sim_options={}, **kwargs):
+    def __init__(self, k=40, min_k=1, sim_options={}, fix_k_neighbors=False,
+                 **kwargs):
 
-        SymmetricAlgo.__init__(self, sim_options=sim_options, **kwargs)
+        SymmetricAlgo.__init__(self, sim_options=sim_options,
+                               fix_k_neighbors=fix_k_neighbors, **kwargs)
 
         self.k = k
         self.min_k = min_k
@@ -251,13 +405,25 @@ class KNNBaseline(SymmetricAlgo):
         bsl_options(dict): A dictionary of options for the baseline estimates
             computation. See :ref:`baseline_estimates_configuration` for
             accepted options.
-
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, k=40, min_k=1, sim_options={}, bsl_options={}):
+    def __init__(self, k=40, min_k=1, sim_options={}, bsl_options={},
+                 fix_k_neighbors=False):
 
         SymmetricAlgo.__init__(self, sim_options=sim_options,
-                               bsl_options=bsl_options)
+                               bsl_options=bsl_options,
+                               fix_k_neighbors=fix_k_neighbors)
 
         self.k = k
         self.min_k = min_k
@@ -342,11 +508,24 @@ class KNNWithZScore(SymmetricAlgo):
         sim_options(dict): A dictionary of options for the similarity
             measure. See :ref:`similarity_measures_configuration` for accepted
             options.
+        fix_k_neighbors(boolean): Defines whether k neighbours used to
+            predict similarity are fixed, i.e. same neighbours are used for any
+            item, or k neighbours are obtained for each item as the closest
+            neighbours among those who actually rated this item (for user_based
+            similarity) or among all similar items (for item-item similarity).
+            Default = False
+            - For item-item kNN - usual strategy is to obtain k
+            closest items, among all items rated by current user (use default)
+            - For user_based kNN - to obtain some kind of prediction for
+            a greater number of items use non-fixed neighbors (default).
+            To obtain consistent ranking among top-N predctions - use fixed k.
     """
 
-    def __init__(self, k=40, min_k=1, sim_options={}, **kwargs):
+    def __init__(self, k=40, min_k=1, sim_options={}, fix_k_neighbors=False,
+                 **kwargs):
 
-        SymmetricAlgo.__init__(self, sim_options=sim_options, **kwargs)
+        SymmetricAlgo.__init__(self, sim_options=sim_options, **kwargs,
+                               fix_k_neighbors=fix_k_neighbors)
 
         self.k = k
         self.min_k = min_k
@@ -358,8 +537,8 @@ def fit(self, trainset):
         self.means = np.zeros(self.n_x)
         self.sigmas = np.zeros(self.n_x)
         # when certain sigma is 0, use overall sigma
-        self.overall_sigma = np.std([r for (_, _, r)
-                                     in self.trainset.all_ratings()])
+        self.overall_sigma = np.std(
+            [r for (_, _, r) in self.trainset.all_ratings()])
 
         for x, ratings in iteritems(self.xr):
             self.means[x] = np.mean([r for (_, r) in ratings])