Source code for evalmate.evaluator.outcome

import numpy as np


[docs]class Outcome:
    """
    An outcome represents the annotation/labels/transcriptions of a dataset/corpus for a given task.
    This can be either the ground truth/reference or the system output/hypothesis.

    If no durations are provided or duration for some utterances are missing,
    some methods may not work or throw exceptions.

    Attributes:
        label_lists (dict): Dictionary containing all label-lists with the utterance-idx/sample-idx as key.
        utterance_durations (dict): Dictionary (utterance-idx/duration) containing the durations of all utterances.
    """

    def __init__(self, label_lists=None, utterance_durations=None):
        self.label_lists = label_lists or {}
        self.utterance_durations = utterance_durations or {}

[docs]    def label_set(self):
        """ Return a label-set containing all labels. """
        ls = LabelSet()

        for label_list in self.label_lists.values():
            ls.labels.extend(label_list.labels)

        return ls

[docs]    def label_set_for_value(self, value):
        """
        Return a label-set containing all labels, where the value is `value`.

        Arguments:
            value (str): The value to filter.

        Returns:
            LabelSet: Label-set containing all labels with the given value.
        """
        ls = LabelSet()

        for label_list in self.label_lists.values():
            for label in label_list:
                if label.value == value:
                    ls.labels.append(label)

        return ls

    @property
    def total_duration(self):
        """
        Return the duration of all utterances together.

        Notes:
            Only works if for all utterances, the durations are provided.
        """
        if len(set(self.label_lists.keys()).difference(self.utterance_durations.keys())) > 0:
            raise ValueError('Missing durations for some utterances!')

        return sum(self.utterance_durations.values())

    @property
    def all_values(self):
        """
        Return a set of all values, occurring in the outcome.
        """
        values = set()

        for ll in self.label_lists.values():
            values.update(ll.label_values())

        return values


[docs]class LabelSet:
    """
    Class to collect a bunch of labels.
    This is used to compute statistics over a defined set of labels.

    For example we want to compute the average length of all labels with the value 'music'.
    We can then collect all these in a label-set and perform the computation.
    """

    def __init__(self, labels=None):
        self.labels = labels or []

    @property
    def count(self):
        """ Return the number of labels. """
        return len(self.labels)

    @property
    def length_min(self):
        """ Return the length of the shortest label. """
        return np.min(self.label_lengths)

    @property
    def length_max(self):
        """ Return the length of the longest label. """
        return np.max(self.label_lengths)

    @property
    def length_mean(self):
        """ Return the mean length of all labels. """
        return np.mean(self.label_lengths)

    @property
    def length_median(self):
        """ Return the median of all label lengths. """
        return np.median(self.label_lengths)

    @property
    def length_variance(self):
        """ Return the variance of all label lengths. """
        return np.var(self.label_lengths)

    @property
    def label_lengths(self):
        """ Return a list containing all label lengths. """
        return [label.duration for label in self.labels]