maciejkula · omkarakaya · Mar 16, 2018 · Mar 16, 2018 · Mar 16, 2018 · Mar 22, 2018
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+.idea
 *~
 *#*
 

diff --git a/spotlight/evaluation.py b/spotlight/evaluation.py
@@ -242,3 +242,73 @@ def rmse_score(model, test):
     predictions = model.predict(test.user_ids, test.item_ids)
 
     return np.sqrt(((test.ratings - predictions) ** 2).mean())
+
+
+def intra_distance_score(model, train, user_ids, k=10):
+    """
+    Compute IntraDistance@k diversity of a set of recommended items which is defined
+    as the average pairwise distance of the items in the set.
+
+    In early definitions, it's called average dissimilarity [2]
+    It's best known as average intra-list distance [1]
+
+    .. [1] Castells, P., Hurley, N.J. and Vargas, S., 2015.
+        Novelty and diversity in recommender systems.
+        In Recommender Systems Handbook (pp. 881-918). Springer, Boston, MA.
+
+    .. [2] Hurley, N. and Zhang, M., 2011.
+        Novelty and diversity in top-n recommendation--analysis and evaluation.
+        ACM Transactions on Internet Technology (TOIT), 10(4), p.14.
+
+    Distance between items i,j is calculated as;
+    1 - intersection(i,j) / length(i) * length(j)
+
+    Parameters
+    ----------
+
+    model: fitted instance of a recommender model
+        The model to evaluate.
+    user_ids: List of user ids to be tested.
+    train: :class:`spotlight.interactions.Interactions`, optional
+        Train interactions. If supplied, scores of known
+        interactions will not affect the computed metrics.
+    k: int or array of int,
+        The maximum number of predicted items
+    Returns
+    -------
+
+    (IntraDistance@k): numpy array of shape (len(users), len(k * (k-1) / 2)
+        A list of distances between each item in recommendation
+        list with length k for each test user.
+    """
+
+    distances = []
+
+    train = train.tocsr()
+    train_t = train.T
+    lengths = train_t.getnnz(axis=1)
+
+    for user_id, _ in enumerate(user_ids):
+        distance = []
+
+        predictions = -model.predict(user_id)
+
+        if train is not None:
+            predictions[train[user_id].indices] = FLOAT_MAX
+
+        predictions = -model.predict(user_id)
+        rec_list = predictions.argsort()[:k]
+
+        for i, first_item in enumerate(rec_list):
+            for second_item in rec_list[(i + 1):]:
+                distance.append(_get_distance(train_t, lengths, first_item, second_item))
+
+        distances.append(distance)
+    return np.array(distances)
+
+
+def _get_distance(mat, lengths, first_item, second_item):
+    numerator = np.in1d(mat[first_item].indices, mat[second_item].indices, assume_unique=True).sum()
+    denominator = lengths[first_item] * lengths[second_item]
+    similarity = numerator / denominator
+    return 1 - similarity
diff --git a/tests/test_evaluation_metrics.py b/tests/test_evaluation_metrics.py
@@ -4,7 +4,8 @@
 
 import pytest
 
-from spotlight.evaluation import precision_recall_score, sequence_precision_recall_score
+from spotlight.evaluation import precision_recall_score, \
+    sequence_precision_recall_score, intra_distance_score
 from spotlight.cross_validation import random_train_test_split, user_based_train_test_split
 from spotlight.datasets import movielens
 from spotlight.factorization.implicit import ImplicitFactorizationModel
@@ -111,3 +112,16 @@ def test_precision_recall(data_implicit_factorization, k):
         assert len(precision.shape) == 1
     else:
         assert precision.shape[1] == len(k)
+
+
+def test_intra_distance(data_implicit_factorization):
+
+    (train, test, model) = data_implicit_factorization
+
+    k = 5
+    user_ids = list(set(test.user_ids))
+    distances = intra_distance_score(model, train, user_ids, k=k)
+
+    assert len(distances) == len(user_ids)
+    for distance in distances:
+        assert len(distance) == k * (k - 1) / 2 or len(distance) == 0
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    .idea
     *~
     *#*
@@ Expand Down @@