Source code for rankeval.metrics.kendall_tau

# Copyright (c) 2017, All Contributors (see CONTRIBUTORS file)
# Authors: Cristina Muntean <cristina.muntean@isti.cnr.it>
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.


import scipy.stats as stats
from rankeval.metrics.metric import Metric


[docs]class Kendalltau(Metric):
    """
    This class implements Kendall's Tau.
    We use the Kendall tau coefficient implementation from scipy.

    """

    def __init__(self, name='K'):
        """
        This is the constructor of Kendall Tau, an object of type Metric,
        with the name K. The constructor also allows setting custom values in
        the following parameters.

        Parameters
        ----------
        name: string
            K

        """
        super(Kendalltau, self).__init__(name)


[docs]    def eval(self, dataset, y_pred):
        """
        This method computes the Kendall tau score over the entire dataset and
        the detailed scores per query. It calls the eval_per query method
        for each query in order to get the detailed Kendall tau score.

        Parameters
        ----------
        dataset : Dataset
            Represents the Dataset object on which to apply Kendall Tau.
        y_pred : numpy 1d array of float
            Represents the predicted document scores for each instance
            in the dataset.

        Returns
        -------
        avg_score: float
            The overall Kendall tau score (averages over the detailed scores).
        detailed_scores: numpy 1d array of floats
            The detailed Kendall tau scores for each query, an array with length
            of the number of queries.
        """
        return super(Kendalltau, self).eval(dataset, y_pred)


[docs]    def eval_per_query(self, y, y_pred):
        """
        This methods computes Kendall tau at per query level (on the instances
        belonging to a specific query). The Kendall tau per query is
        calculated as:

            tau = (P - Q) / sqrt((P + Q + T) * (P + Q + U))

        where P is the number of concordant pairs, Q the number of discordant
        pairs, T the number of ties only in x, and U the number of ties only
        in y. If a tie occurs for the same pair in both x and y, it is not
        added to either T or U.
        s
        Whether to use lexsort or quicksort as the sorting method for the
        initial sort of the inputs.  Default is lexsort (True), for which
        kendalltau is of complexity O(n log(n)). If False, the complexity
        is O(n^2), but with a smaller pre-factor (so quicksort may be faster
        for small arrays).

        Parameters
        ----------
        y: numpy array
            Represents the labels of instances corresponding to one query in
            the dataset (ground truth).
        y_pred: numpy array.
            Represents the predicted document scores obtained during the model
            scoring phase for that query.

        Returns
        -------
        kendalltau: float
            The Kendall tau per query.
        """
        kendall_tau = stats.kendalltau(y, y_pred, initial_lexsort=True)
        return kendall_tau.correlation


    def __str__(self):
        s = self.name
        return s