Source code for evalmate.evaluator.evaluator

import abc

import audiomate
from audiomate import annotations
from jinja2 import Environment, PackageLoader, select_autoescape

from . import outcome

env = Environment(
    loader=PackageLoader('evalmate.evaluator', 'report_templates'),
    autoescape=select_autoescape(['html', 'xml'])
)


[docs]class Evaluation(abc.ABC):
    """
    Base class for evaluation results.

    Attributes:
        ref_outcome (Outcome): The outcome of the ground-truth/reference.
        hyp_outcome (Outcome): The outcome of the system-output/hypothesis.
    """

    def __init__(self, ref_outcome, hyp_outcome):
        self.ref_outcome = ref_outcome
        self.hyp_outcome = hyp_outcome

    @property
    @abc.abstractmethod
    def template_data(self):
        """ Return a dictionary that contains objects/values to use in the rendering template. """
        return {}

    @property
    def default_template(cls):
        return 'default'

[docs]    def write_report(self, path, template=None):
        """
        Write the report to the given path.

        Args:
            path (str): Path to write the report to.
            template (str): Name of the Jinja2 template to use. If None, the ``default_template()`` is used.
                            All available templates are in the ``report_templates`` folder.
        """
        with open(path, 'w') as f:
            f.write(self.get_report(template=template))

[docs]    def get_report(self, template=None):
        """
        Generate and return a report.

        Args:
            template (str): Name of the Jinja2 template to use. If None, the ``default_template()`` is used.
                            All available templates are in the ``report_templates`` folder.

        Returns:
            str: The rendered report.
        """
        if template is None:
            template = self.default_template

        template = self._load_template(template)
        return template.render(**self.template_data)

    def _load_template(self, name):
        return env.get_template('{}.txt'.format(name))


[docs]class Evaluator(abc.ABC):
    """
    Base class for a evaluator.

    Provides methods for reading outcomes in different ways.
    The evaluator for a specific class then has to implement ``do_evaluate``,
    which performs the evaluation on ref and hyp outcome.
    """

    DEFAULT_UTT_IDX = 'noname'

[docs]    @classmethod
    @abc.abstractmethod
    def default_label_list_idx(cls):
        """ Define the default label-lists which is used when reading a corpus. """
        return 'default'

[docs]    @abc.abstractmethod
    def do_evaluate(self, ref, hyp):
        """
        Create the evaluation result of the given hypothesis compared to the given reference (ground truth).

        Arguments:
            ref (Outcome): The ground-truth/reference outcome.
            hyp (Outcome): The system-output/hypothesis outcome.

        Returns:
            Evaluation: The evaluation results.
        """
        pass

[docs]    def evaluate(self, ref, hyp, label_list_idx=None):
        """
        Create the evaluation result of the given hypothesis compared to the given reference (ground truth).
        There are different possibilities of input:

        * ref = Outcome / hyp = Outcome: Both ref and hyp are `Outcome` instances.
          See ``do_evaluate``
        * ref = Corpus / hyp = dict: The dict contains label-lists which are compared against the corpus.
          See ``evaluate_label_lists_against_corpus``
        * ref = LabelList / hyp = LabelList: Ref label-list is compared against the other.
          See ``evaluate_label_lists``

        Arguments:
            ref (LabelList, Corpus): A label-list, a corpus.
            hyp (LabelList, dict): A label-list, a dict.
            label_list_idx (str): The label-list to use when reading from a corpus.

        Returns:
            Evaluation: The evaluation results.
        """

        if isinstance(ref, outcome.Outcome) and isinstance(hyp, outcome.Outcome):
            return self.do_evaluate(ref, hyp)

        if isinstance(ref, annotations.LabelList) and isinstance(hyp, annotations.LabelList):
            return self.evaluate_label_lists(ref, hyp)

        if isinstance(ref, audiomate.Corpus) and isinstance(hyp, dict):
            return self.evaluate_label_lists_against_corpus(ref, hyp, label_list_idx=label_list_idx)

        raise ValueError('Invalid arguments!')

[docs]    def evaluate_label_lists(self, ll_ref, ll_hyp, duration=None):
        """
        Create Evaluation for ref and hyp label-list.
        If the duration is not provided some metrics cannot be used.

        Arguments:
            ref (LabelList): A label-list.
            hyp (LabelList): A label-list.
            duration (float): The duration of the utterance, that belongs to the label-lists.

        Returns:
            Evaluation: The evaluation results.
        """

        durations = None

        if duration is not None:
            durations = {self.DEFAULT_UTT_IDX: duration}

        ref_outcome = outcome.Outcome(label_lists={self.DEFAULT_UTT_IDX: ll_ref}, utterance_durations=durations)
        hyp_outcome = outcome.Outcome(label_lists={self.DEFAULT_UTT_IDX: ll_hyp}, utterance_durations=durations)

        return self.evaluate(ref_outcome, hyp_outcome)

[docs]    def evaluate_label_lists_against_corpus(self, corpus, label_lists, label_list_idx=None):
        """
        Create Evaluation for the given corpus.

        Arguments:
            corpus (Corpus): A corpus containing the reference label-lists.
            label_lists (Dict): A dictionary containing label-lists with the utterance-idx as key.
                                The utterance-idx is used to find the corresponding reference label-list in the corpus.
            label_list_idx (str): The idx of the label-lists to use as reference from the corpus.
                                  If None, `cls.default_label_list_idx` is used.

        Returns:
            Evaluation: The evaluation results.
        """
        label_list_idx = label_list_idx or self.default_label_list_idx()

        ref_outcome = outcome.Outcome()
        hyp_outcome = outcome.Outcome()

        for utterance in corpus.utterances.values():
            ll_ref = utterance.label_lists[label_list_idx]

            if utterance.idx not in label_lists:
                raise ValueError('There is no hypothesis label-list with idx {}'.format(utterance.idx))

            ll_hyp = label_lists[utterance.idx]

            ref_outcome.label_lists[utterance.idx] = ll_ref
            hyp_outcome.label_lists[utterance.idx] = ll_hyp

            ref_outcome.utterance_durations[utterance.idx] = utterance.duration
            hyp_outcome.utterance_durations[utterance.idx] = utterance.duration

        return self.evaluate(ref_outcome, hyp_outcome)