Source code for rankeval.metrics.mse

# Copyright (c) 2017, All Contributors (see CONTRIBUTORS file)
# Authors: Cristina Muntean <cristina.muntean@isti.cnr.it>
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import numpy as np

from rankeval.metrics import Metric


[docs]class MSE(Metric):
    """
    This class implements Mean squared error (MSE) with several parameters.

    """
    def __init__(self, name='MSE', cutoff=None):
        """
        This is the constructor of MSE, an object of type Metric, with
        the name MSE. The constructor also allows setting custom values in
        the following parameters.

        Parameters
        ----------
        name: string
            MSE
        cutoff: int
            The top k results to be considered at per query level (e.g. 10),
            otherwise the default value is None and is computed on all the
            instances of a query.
        """
        super(self.__class__, self).__init__(name)
        self.cutoff = cutoff

[docs]    def eval(self, dataset, y_pred):
        """
        This method takes the MSE for each query and calculates
        the average MSE.

        Parameters
        ----------
        dataset : Dataset
            Represents the Dataset object on which to apply MSE.
        y_pred : numpy 1d array of float
            Represents the predicted document scores for each instance
            in the dataset.

        Returns
        -------
        avg_score: float
            The overall MSE score (summed over the detailed MSE scores).
        detailed_scores: numpy 1d array of floats
            The detailed MSE@k scores for each query, an array of length of
            the number of queries.
        """
        # return super(self.__class__, self).eval(dataset, y_pred)

        self.detailed_scores = np.zeros(dataset.n_queries, dtype=np.float32)

        for qid, q_y, q_y_pred in self.query_iterator(dataset, y_pred):
            self.detailed_scores[qid] = \
                self.eval_per_query(q_y, q_y_pred) / dataset.n_instances
        return self.detailed_scores.sum(), self.detailed_scores

[docs]    def eval_per_query(self, y, y_pred):
        """
        This method helps compute the MSE score per query. It is called by
        the eval function which averages and aggregates the scores
        for each query.

        Parameters
        ----------
        y: numpy array
            Represents the labels of instances corresponding to one query in
            the dataset (ground truth).
        y_pred: numpy array.
            Represents the predicted document scores obtained during the model
            scoring phase for that query.

        Returns
        -------
        rmse: float
            Represents the MSE score for one query.

        """
        if self.cutoff is not None:
            idx = np.argsort(y_pred)[::-1][:self.cutoff]
            return ((y[idx] - y_pred[idx]) ** 2).sum()
        else:
            return ((y - y_pred) ** 2.0).sum()

    def __str__(self):
        s = self.name
        if self.cutoff is not None:
            s += "@{}".format(self.cutoff)
        return s