Fix .rank() method for multiple models (#615)

The new Recommender.rank() function adds k as required value, which breaks some models that do not use k in ranking evaluation (e.g., ComparER, EFM, LRPPM). This commit updates .rank() for mentioned models with topK option.

Fix .rank() method for multiple models (#615)
The new Recommender.rank() function adds k as required value, which breaks some models that do not use k in ranking evaluation (e.g., ComparER, EFM, LRPPM). This commit updates .rank() for mentioned models with topK option.
ae7ba860 · Jaime Hieu Do · GitHub · cbdc8f24 · ae7ba860 · ae7ba860
Unverified Commit ae7ba860 authored 1 year ago by Jaime Hieu Do Committed by GitHub 1 year ago
--- a/cornac/models/comparer/recom_comparer_obj.pyx
+++ b/cornac/models/comparer/recom_comparer_obj.pyx
@@ -663,39 +663,51 @@ class ComparERObj(Recommender):
            item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :])
            return item_score
-    def rank(self, user_id, item_ids=None):
+    def rank(self, user_idx, item_indices=None, k=-1):
        """Rank all test items for a given user.
        Parameters
        ----------
-        user_id: int, required
+        user_idx: int, required
            The index of the user for whom to perform item raking.
-        item_ids: 1d array, optional, default: None
+        item_indices: 1d array, optional, default: None
            A list of candidate item indices to be ranked by the user.
            If `None`, list of ranked known item indices and their scores will be returned
+        k: int, required
+            Cut-off length for recommendations, k=-1 will return ranked list of all items.
+            This is more important for ANN to know the limit to avoid exhaustive ranking.
        Returns
        -------
-        Tuple of `item_rank`, and `item_scores`. The order of values
+        (ranked_items, item_scores): tuple
-        in item_scores are corresponding to the order of their ids in item_ids
+            `ranked_items` contains item indices being ranked by their scores.
+            `item_scores` contains scores of items corresponding to index in `item_indices` input.
        """
-        X_ = self.U1[user_id, :].dot(self.V.T)
+        X_ = self.U1[user_idx, :].dot(self.V.T)
        most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects]
        most_cared_X_ = X_[most_cared_aspects_indices]
        most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T)
        explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale)
-        item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id)
+        all_item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx)
-        if item_ids is None:
+        # rank items based on their scores
-            item_scores = item_scores
+        item_indices = (
-            item_rank = item_scores.argsort()[::-1]
+            np.arange(self.num_items)
-        else:
+            if item_indices is None
-            num_items = max(self.num_items, max(item_ids) + 1)
+            else np.asarray(item_indices)
-            item_scores = np.ones(num_items) * np.min(item_scores)
+        )
-            item_scores[:self.num_items] = item_scores
+        item_scores = all_item_scores[item_indices]
-            item_rank = item_scores.argsort()[::-1]
-            item_rank = intersects(item_rank, item_ids, assume_unique=True)
+        if k != -1:  # O(n + k log k), faster for small k which is usually the case
-            item_scores = item_scores[item_ids]
+            partitioned_idx = np.argpartition(item_scores, -k)
-        return item_rank, item_scores
+            top_k_idx = partitioned_idx[-k:]
+            sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
+            partitioned_idx[-k:] = sorted_top_k_idx
+            ranked_items = item_indices[partitioned_idx[::-1]]
+        else:  # O(n log n)
+            ranked_items = item_indices[item_scores.argsort()[::-1]]
+        return ranked_items, item_scores
\ No newline at end of file
--- a/cornac/models/comparer/recom_comparer_sub.pyx
+++ b/cornac/models/comparer/recom_comparer_sub.pyx
@@ -759,7 +759,7 @@ class ComparERSub(MTER):
        return correct, skipped, loss, bpr_loss
-    def rank(self, user_idx, item_indices=None):
+    def rank(self, user_idx, item_indices=None, k=-1):
        if self.alpha > 0 and self.n_top_aspects > 0:
            n_top_aspects = min(self.n_top_aspects, self.num_aspects)
            ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx])
@@ -786,12 +786,21 @@ class ComparERSub(MTER):
                all_item_scores[: self.num_items] = known_item_scores
            # rank items based on their scores
-            if item_indices is None:
+            item_indices = (
-                item_scores = all_item_scores[: self.num_items]
+                np.arange(self.num_items)
-                item_rank = item_scores.argsort()[::-1]
+                if item_indices is None
-            else:
+                else np.asarray(item_indices)
-                item_scores = all_item_scores[item_indices]
+            )
-                item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]
+            item_scores = all_item_scores[item_indices]
-            return item_rank, item_scores
+            if k != -1:  # O(n + k log k), faster for small k which is usually the case
-        return super().rank(user_idx, item_indices)
+                partitioned_idx = np.argpartition(item_scores, -k)
\ No newline at end of file
+                top_k_idx = partitioned_idx[-k:]
+                sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
+                partitioned_idx[-k:] = sorted_top_k_idx
+                ranked_items = item_indices[partitioned_idx[::-1]]
+            else:  # O(n log n)
+                ranked_items = item_indices[item_scores.argsort()[::-1]]
+            return ranked_items, item_scores
+        return super().rank(user_idx, item_indices, k)
\ No newline at end of file
--- a/cornac/models/efm/recom_efm.pyx
+++ b/cornac/models/efm/recom_efm.pyx
@@ -468,7 +468,7 @@ class EFM(Recommender):
            item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :])
            return item_score
-    def rank(self, user_idx, item_indices=None):
+    def rank(self, user_idx, item_indices=None, k=-1):
        """Rank all test items for a given user.
        Parameters
@@ -480,10 +480,15 @@ class EFM(Recommender):
            A list of candidate item indices to be ranked by the user.
            If `None`, list of ranked known item indices and their scores will be returned
+        k: int, required
+            Cut-off length for recommendations, k=-1 will return ranked list of all items.
+            This is more important for ANN to know the limit to avoid exhaustive ranking.
        Returns
        -------
-        Tuple of `item_rank`, and `item_scores`. The order of values
+        (ranked_items, item_scores): tuple
-        in item_scores are corresponding to the order of their ids in item_ids
+            `ranked_items` contains item indices being ranked by their scores.
+            `item_scores` contains scores of items corresponding to index in `item_indices` input.
        """
        X_ = self.U1[user_idx, :].dot(self.V.T)
@@ -504,11 +509,20 @@ class EFM(Recommender):
            all_item_scores[: self.num_items] = known_item_scores
        # rank items based on their scores
-        if item_indices is None:
+        item_indices = (
-            item_scores = all_item_scores[: self.num_items]
+            np.arange(self.num_items)
-            item_rank = item_scores.argsort()[::-1]
+            if item_indices is None
-        else:
+            else np.asarray(item_indices)
-            item_scores = all_item_scores[item_indices]
+        )
-            item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]
+        item_scores = all_item_scores[item_indices]
-        return item_rank, item_scores
+        if k != -1:  # O(n + k log k), faster for small k which is usually the case
+            partitioned_idx = np.argpartition(item_scores, -k)
+            top_k_idx = partitioned_idx[-k:]
+            sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
+            partitioned_idx[-k:] = sorted_top_k_idx
+            ranked_items = item_indices[partitioned_idx[::-1]]
+        else:  # O(n log n)
+            ranked_items = item_indices[item_scores.argsort()[::-1]]
+        return ranked_items, item_scores
--- a/cornac/models/lrppm/recom_lrppm.pyx
+++ b/cornac/models/lrppm/recom_lrppm.pyx
@@ -516,7 +516,7 @@ class LRPPM(Recommender):
            item_score = self.I[i_idx].dot(self.U[u_idx])
            return item_score
-    def rank(self, user_idx, item_indices=None):
+    def rank(self, user_idx, item_indices=None, k=-1):
        if self.alpha > 0 and self.num_top_aspects > 0:
            n_items = self.num_items
            num_top_aspects = min(self.num_top_aspects, self.num_aspects)
@@ -540,12 +540,21 @@ class LRPPM(Recommender):
                all_item_scores[: self.num_items] = known_item_scores
            # rank items based on their scores
-            if item_indices is None:
+            item_indices = (
-                item_scores = all_item_scores[: self.num_items]
+                np.arange(self.num_items)
-                item_rank = item_scores.argsort()[::-1]
+                if item_indices is None
-            else:
+                else np.asarray(item_indices)
-                item_scores = all_item_scores[item_indices]
+            )
-                item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]
+            item_scores = all_item_scores[item_indices]
-            return item_rank, item_scores
+            if k != -1:  # O(n + k log k), faster for small k which is usually the case
-        return super().rank(user_idx, item_indices)
+                partitioned_idx = np.argpartition(item_scores, -k)
\ No newline at end of file
+                top_k_idx = partitioned_idx[-k:]
+                sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
+                partitioned_idx[-k:] = sorted_top_k_idx
+                ranked_items = item_indices[partitioned_idx[::-1]]
+            else:  # O(n log n)
+                ranked_items = item_indices[item_scores.argsort()[::-1]]
+            return ranked_items, item_scores
+        return super().rank(user_idx, item_indices, k)
\ No newline at end of file