Source code for rankeval.scoring.scorer

# Copyright (c) 2017, All Contributors (see CONTRIBUTORS file)
# Authors: Salvatore Trani <salvatore.trani@isti.cnr.it>
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
Class for efficient scoring of an ensemble-based model composed of binary regression trees on a given dataset.
"""

from ..dataset import Dataset
from _efficient_scoring import basic_scoring, detailed_scoring


[docs]class Scorer(object): """ Class for efficient scoring of an ensemble-based model composed of binary regression trees on a given dataset. This class can be used for simple or detailed scoring, depending on the mode selected at scoring time. The document scores are cached as to avoid useless re-scoring. Thus, calling multiple times the `score` method does not involve the scoring activity to be executed again, except for a detailed scoring following a basic scoring. Indeed in this situation the scoring has to be repeated as to analyze in depth the scoring behaviour. Parameters ---------- model: RTEnsemble The model to use for scoring dataset: Dataset The dataset to use for scoring Attributes ---------- model : RTEnsemble The model to use for scoring dataset : Dataset The dataset to use for scoring y_pred : numpy array of float The predicted scores produced by the given model for each sample of the given dataset X partial_y_pred : numpy 2d-array of float The predicted score of each tree of the model for each dataset instance """ def __init__(self, model, dataset): self.model = model self.dataset = dataset # Save the predicted scores for each dataset instance self.y_pred = None # Save the partial scores of each tree for each dataset instance # (if detailed scoring is True) self.partial_y_pred = None
[docs] def score(self, detailed): """ Parameters ---------- detailed : bool True if the class has to performs a detailed scoring, false otherwise Returns ------- y : numpy array of float the predicted scores produced by the given model for each sample of the given dataset X Attributes ---------- self.y : array of float The predicted scores of each dataset instance """ # Skip the scoring if it has already been done (return cached results) if not detailed and self.y_pred is not None or \ detailed and self.partial_y_pred is not None: return self.y_pred if detailed: self.partial_y_pred = detailed_scoring(self.model, self.dataset.X) self.y_pred = self.partial_y_pred.sum(axis=1) else: self.y_pred = basic_scoring(self.model, self.dataset.X) return self.y_pred
[docs] def get_predicted_scores(self): """ Provide an accessor to the predicted scores produced by the given model for each sample of the given dataset X Returns ------- scores : numpy array of float The predicted scores produced by the given model for each sample of the given dataset X """ if self.y_pred is None: self.score(detailed=False) return self.y_pred
[docs] def get_partial_predicted_scores(self): """ Provide an accessor to the partial scores produced by the given model for each sample of the given dataset X. Each partial score reflects the score produced by a single tree of the ensemble model to a single dataset instance. Thus, the returned numpy matrix has a shape of (n_instances, n_trees). The partial scores does not take into account the tree weights, thus for producing the final score is needed to multiply each row for the tree weight vector. Returns ------- scores : numpy 2d-array of float The predicted score of each tree of the model for each dataset instance """ if self.partial_y_pred is None: self.score(detailed=True) return self.partial_y_pred