Skip to content
Snippets Groups Projects
Unverified Commit ae7ba860 authored by Jaime Hieu Do's avatar Jaime Hieu Do Committed by GitHub
Browse files

Fix .rank() method for multiple models (#615)

The new Recommender.rank() function adds k as required value, which breaks some models that do not use k in ranking evaluation (e.g., ComparER, EFM, LRPPM).

This commit updates .rank() for mentioned models with topK option.
parent cbdc8f24
No related branches found
No related tags found
No related merge requests found
...@@ -663,39 +663,51 @@ class ComparERObj(Recommender): ...@@ -663,39 +663,51 @@ class ComparERObj(Recommender):
item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :]) item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :])
return item_score return item_score
def rank(self, user_id, item_ids=None): def rank(self, user_idx, item_indices=None, k=-1):
"""Rank all test items for a given user. """Rank all test items for a given user.
Parameters Parameters
---------- ----------
user_id: int, required user_idx: int, required
The index of the user for whom to perform item raking. The index of the user for whom to perform item raking.
item_ids: 1d array, optional, default: None item_indices: 1d array, optional, default: None
A list of candidate item indices to be ranked by the user. A list of candidate item indices to be ranked by the user.
If `None`, list of ranked known item indices and their scores will be returned If `None`, list of ranked known item indices and their scores will be returned
k: int, required
Cut-off length for recommendations, k=-1 will return ranked list of all items.
This is more important for ANN to know the limit to avoid exhaustive ranking.
Returns Returns
------- -------
Tuple of `item_rank`, and `item_scores`. The order of values (ranked_items, item_scores): tuple
in item_scores are corresponding to the order of their ids in item_ids `ranked_items` contains item indices being ranked by their scores.
`item_scores` contains scores of items corresponding to index in `item_indices` input.
""" """
X_ = self.U1[user_id, :].dot(self.V.T) X_ = self.U1[user_idx, :].dot(self.V.T)
most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects] most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects]
most_cared_X_ = X_[most_cared_aspects_indices] most_cared_X_ = X_[most_cared_aspects_indices]
most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T) most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T)
explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale) explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale)
item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id) all_item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx)
if item_ids is None: # rank items based on their scores
item_scores = item_scores item_indices = (
item_rank = item_scores.argsort()[::-1] np.arange(self.num_items)
else: if item_indices is None
num_items = max(self.num_items, max(item_ids) + 1) else np.asarray(item_indices)
item_scores = np.ones(num_items) * np.min(item_scores) )
item_scores[:self.num_items] = item_scores item_scores = all_item_scores[item_indices]
item_rank = item_scores.argsort()[::-1]
item_rank = intersects(item_rank, item_ids, assume_unique=True) if k != -1: # O(n + k log k), faster for small k which is usually the case
item_scores = item_scores[item_ids] partitioned_idx = np.argpartition(item_scores, -k)
return item_rank, item_scores top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]
return ranked_items, item_scores
\ No newline at end of file
...@@ -759,7 +759,7 @@ class ComparERSub(MTER): ...@@ -759,7 +759,7 @@ class ComparERSub(MTER):
return correct, skipped, loss, bpr_loss return correct, skipped, loss, bpr_loss
def rank(self, user_idx, item_indices=None): def rank(self, user_idx, item_indices=None, k=-1):
if self.alpha > 0 and self.n_top_aspects > 0: if self.alpha > 0 and self.n_top_aspects > 0:
n_top_aspects = min(self.n_top_aspects, self.num_aspects) n_top_aspects = min(self.n_top_aspects, self.num_aspects)
ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx]) ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx])
...@@ -786,12 +786,21 @@ class ComparERSub(MTER): ...@@ -786,12 +786,21 @@ class ComparERSub(MTER):
all_item_scores[: self.num_items] = known_item_scores all_item_scores[: self.num_items] = known_item_scores
# rank items based on their scores # rank items based on their scores
if item_indices is None: item_indices = (
item_scores = all_item_scores[: self.num_items] np.arange(self.num_items)
item_rank = item_scores.argsort()[::-1] if item_indices is None
else: else np.asarray(item_indices)
item_scores = all_item_scores[item_indices] )
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]] item_scores = all_item_scores[item_indices]
return item_rank, item_scores if k != -1: # O(n + k log k), faster for small k which is usually the case
return super().rank(user_idx, item_indices) partitioned_idx = np.argpartition(item_scores, -k)
\ No newline at end of file top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]
return ranked_items, item_scores
return super().rank(user_idx, item_indices, k)
\ No newline at end of file
...@@ -468,7 +468,7 @@ class EFM(Recommender): ...@@ -468,7 +468,7 @@ class EFM(Recommender):
item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :]) item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :])
return item_score return item_score
def rank(self, user_idx, item_indices=None): def rank(self, user_idx, item_indices=None, k=-1):
"""Rank all test items for a given user. """Rank all test items for a given user.
Parameters Parameters
...@@ -480,10 +480,15 @@ class EFM(Recommender): ...@@ -480,10 +480,15 @@ class EFM(Recommender):
A list of candidate item indices to be ranked by the user. A list of candidate item indices to be ranked by the user.
If `None`, list of ranked known item indices and their scores will be returned If `None`, list of ranked known item indices and their scores will be returned
k: int, required
Cut-off length for recommendations, k=-1 will return ranked list of all items.
This is more important for ANN to know the limit to avoid exhaustive ranking.
Returns Returns
------- -------
Tuple of `item_rank`, and `item_scores`. The order of values (ranked_items, item_scores): tuple
in item_scores are corresponding to the order of their ids in item_ids `ranked_items` contains item indices being ranked by their scores.
`item_scores` contains scores of items corresponding to index in `item_indices` input.
""" """
X_ = self.U1[user_idx, :].dot(self.V.T) X_ = self.U1[user_idx, :].dot(self.V.T)
...@@ -504,11 +509,20 @@ class EFM(Recommender): ...@@ -504,11 +509,20 @@ class EFM(Recommender):
all_item_scores[: self.num_items] = known_item_scores all_item_scores[: self.num_items] = known_item_scores
# rank items based on their scores # rank items based on their scores
if item_indices is None: item_indices = (
item_scores = all_item_scores[: self.num_items] np.arange(self.num_items)
item_rank = item_scores.argsort()[::-1] if item_indices is None
else: else np.asarray(item_indices)
item_scores = all_item_scores[item_indices] )
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]] item_scores = all_item_scores[item_indices]
return item_rank, item_scores if k != -1: # O(n + k log k), faster for small k which is usually the case
partitioned_idx = np.argpartition(item_scores, -k)
top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]
return ranked_items, item_scores
...@@ -516,7 +516,7 @@ class LRPPM(Recommender): ...@@ -516,7 +516,7 @@ class LRPPM(Recommender):
item_score = self.I[i_idx].dot(self.U[u_idx]) item_score = self.I[i_idx].dot(self.U[u_idx])
return item_score return item_score
def rank(self, user_idx, item_indices=None): def rank(self, user_idx, item_indices=None, k=-1):
if self.alpha > 0 and self.num_top_aspects > 0: if self.alpha > 0 and self.num_top_aspects > 0:
n_items = self.num_items n_items = self.num_items
num_top_aspects = min(self.num_top_aspects, self.num_aspects) num_top_aspects = min(self.num_top_aspects, self.num_aspects)
...@@ -540,12 +540,21 @@ class LRPPM(Recommender): ...@@ -540,12 +540,21 @@ class LRPPM(Recommender):
all_item_scores[: self.num_items] = known_item_scores all_item_scores[: self.num_items] = known_item_scores
# rank items based on their scores # rank items based on their scores
if item_indices is None: item_indices = (
item_scores = all_item_scores[: self.num_items] np.arange(self.num_items)
item_rank = item_scores.argsort()[::-1] if item_indices is None
else: else np.asarray(item_indices)
item_scores = all_item_scores[item_indices] )
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]] item_scores = all_item_scores[item_indices]
return item_rank, item_scores if k != -1: # O(n + k log k), faster for small k which is usually the case
return super().rank(user_idx, item_indices) partitioned_idx = np.argpartition(item_scores, -k)
\ No newline at end of file top_k_idx = partitioned_idx[-k:]
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
partitioned_idx[-k:] = sorted_top_k_idx
ranked_items = item_indices[partitioned_idx[::-1]]
else: # O(n log n)
ranked_items = item_indices[item_scores.argsort()[::-1]]
return ranked_items, item_scores
return super().rank(user_idx, item_indices, k)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment