Source code for rankeval.metrics.map

# Copyright (c) 2017, All Contributors (see CONTRIBUTORS file)
# Authors: Cristina Muntean <cristina.muntean@isti.cnr.it>
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.


import numpy as np
from rankeval.metrics.metric import Metric


[docs]class MAP(Metric): """ This class implements MAP with several parameters. We implemented MAP as in https://www.kaggle.com/wiki/MeanAveragePrecision, adapted from: http://en.wikipedia.org/wiki/Information_retrieval http://sas.uwaterloo.ca/stats_navigation/techreports/04WorkingPapers/2004-09.pdf """ def __init__(self, name='MAP', cutoff=None): """ This is the constructor of MAP, an object of type Metric, with the name MAP. The constructor also allows setting custom values in the following parameters. Parameters ---------- name: string MAP cutoff: int The top k results to be considered at per query level (e.g. 10), otherwise the default value is None and is computed on all the instances of a query. """ super(MAP, self).__init__(name) self.cutoff = cutoff
[docs] def eval(self, dataset, y_pred): """ This method takes the AP@k for each query and calculates the average, thus MAP@k. Parameters ---------- dataset : Dataset Represents the Dataset object on which to apply MAP. y_pred : numpy 1d array of float Represents the predicted document scores for each instance in the dataset. Returns ------- avg_score: float The overall MAP@k score (averages over the detailed MAP scores). detailed_scores: numpy 1d array of floats The detailed AP@k scores for each query, an array of length of the number of queries. """ return super(MAP, self).eval(dataset, y_pred)
[docs] def eval_per_query(self, y, y_pred): """ This methods computes AP@k at per query level (on the instances belonging to a specific query). The AP@k per query is calculated as ap@k = sum( P(k) / min(m,n) ), for k=1,n where: - P(k) means the precision at cut-off k in the item list. P(k) equals 0 when the k-th item is not followed upon recommendation - m is the number of relevant documents - n is the number of predicted documents If the denominator is zero, P(k)/min(m,n) is set to zero. Parameters ---------- y: numpy array Represents the labels of instances corresponding to one query in the dataset (ground truth). y_pred: numpy array. Represents the predicted document scores obtained during the model scoring phase for that query. Returns ------- map : float The MAP per query. """ idx_y_pred_sorted = np.argsort(y_pred)[::-1] if self.cutoff is not None: idx_y_pred_sorted = idx_y_pred_sorted[:self.cutoff] n_retrieved = len(idx_y_pred_sorted) precision_at_i = 0. n_relevant_retrieved_at_i = 0. for i in range(n_retrieved): if y[idx_y_pred_sorted[i]] != 0: n_relevant_retrieved_at_i += 1 precision_at_i += float(n_relevant_retrieved_at_i) / (i + 1) return precision_at_i / n_retrieved
def __str__(self): s = self.name if self.cutoff is not None: s += "@{}".format(self.cutoff) return s