Source code for evalmate.alignment.time

import numpy as np
import scipy

from evalmate.utils import label

from . import utils
from . import aligner
from . import candidates


[docs]class BipartiteMatchingAligner(aligner.EventAligner):
    """
    Create event-based alignment, based on bipartite matching.

    1. In a first step for every possible label-pair between ref and hyp,
    it is decided if a mapping of such a pair is possible.
    For this a CandidateFinder is used.

    2. Using penalty and weight parameters,
    for every pair a penalty is computed for aligning the pair.

    3. From all the pairs and the computed probabilities,
    the best alignment is computed using bipartite matching.
    So that every label only occurs once in the final alignment.

    Arguments:
        candidate_finder (CandidateFinder): CandidateFinder to use for finding
                                            potential labels for alignment.
        non_overlap_penalty_weight (float): Weight-factor of penalty for the
                                            non-overlapping ratio between two
                                            labels.
        substitution_penalty (float): Penalty for aligning two labels with
                                      different values.
        deletion_penalty (float): Penalty for aligning a reference-label with
                                  no hypothesis-label.
        insertion_penalty (float): Penalty for aligning a hypothesis-label
                                   with no reference-label.
    """

    def __init__(self, candidate_finder=None, non_overlap_penalty_weight=1,
                 substitution_penalty=2, insertion_penalty=10, deletion_penalty=10):

        if candidate_finder is None:
            self.candidate_finder = candidates.OverlapCandidateFinder()
        else:
            self.candidate_finder = candidate_finder

        self.non_overlap_penalty_weight = non_overlap_penalty_weight
        self.substitution_penalty = substitution_penalty
        self.insertion_penalty = insertion_penalty
        self.deletion_penalty = deletion_penalty

[docs]    def align(self, ref_labels, hyp_labels):
        """
        Return an alignment between the events of the given label-lists.

        Args:
            ref_labels (list): The list containing labels of the ground truth.
            hyp_labels (list): The list containing labels of the system output.

        Returns:
            list: A list of :class:`evalmate.alignment.LabelPair`.
            Every pair contains one label (event) from the ground truth and
            one from the system output, that are aligned.
            One of them also can be ``None``.
        """

        if len(ref_labels) == 0 and len(hyp_labels) == 0:
            return []

        if len(ref_labels) == 0:
            return [utils.LabelPair(None, x) for x in hyp_labels]

        if len(hyp_labels) == 0:
            return [utils.LabelPair(x, None) for x in ref_labels]

        close_pairs, ref_no_match, hyp_no_match = self.candidate_finder.find(
            ref_labels, hyp_labels
        )

        # Calculate a high penalty for invalid matches
        invalid_penalty = self.non_overlap_penalty_weight + self.insertion_penalty + self.deletion_penalty

        # Cost matrix: Add possible insertion/deletion rows/cols
        size = len(ref_labels) + len(hyp_labels)
        cost = np.full((size, size), invalid_penalty).astype(np.float)
        cost[len(ref_labels):, :] = self.insertion_penalty
        cost[:, len(hyp_labels):] = self.deletion_penalty

        for pair in close_pairs:
            ref = ref_labels[pair[0]]
            hyp = hyp_labels[pair[1]]
            penalty = 0

            if ref.value != hyp.value:
                penalty += self.substitution_penalty

            penalty += self.non_overlap_penalty_weight * (1 - label.overlap_percentage(ref, hyp))

            cost[pair[0], pair[1]] = penalty

        row_ind, col_ind = scipy.optimize.linear_sum_assignment(cost)

        matching = []

        for i in range(len(row_ind)):
            ref_ind = row_ind[i]
            hyp_ind = col_ind[i]

            if ref_ind >= len(ref_labels):
                ref_ind = -1

            if hyp_ind >= len(hyp_labels):
                hyp_ind = -1

            if hyp_ind != -1 or ref_ind != -1:
                if ref_ind >= 0:
                    ref_label = ref_labels[ref_ind]
                else:
                    ref_label = None

                if hyp_ind >= 0:
                    hyp_label = hyp_labels[hyp_ind]
                else:
                    hyp_label = None

                matching.append(utils.LabelPair(ref_label, hyp_label))

        return matching


[docs]class FullMatchingAligner(aligner.EventAligner):
    """
    Event-based alignment, where all possible matches are returned.
    So a single label can occur multiple times, but with a different counterpart.

    Arguments:
        min_overlap (float): Number of seconds the segment of overlap has to be, to align two labels.
                             If ``0``, any overlap is accepted.
    """

    def __init__(self, min_overlap=0):
        self.min_overlap = min_overlap
        self.finder = candidates.OverlapCandidateFinder(
            min_overlap=min_overlap
        )

[docs]    def align(self, ref_labels, hyp_labels):
        """
        Return an alignment between the labels of the two label-lists.

        Args:
            ref_labels (list): The list containing labels of the ground truth.
            hyp_labels (list): The list containing labels of the system output.

        Returns:
            list: A list of :class:`evalmate.alignment.LabelPair`.
            Every pair contains one label (event) from
            the ground truth and one from the system output,
            that are aligned. One of them also can be ``None``.
        """

        close_pairs, ref_no_match, hyp_no_match = self.finder.find(ref_labels, hyp_labels)

        pairs = []

        for ref_index, hyp_index in close_pairs:
            pair = utils.LabelPair(ref_labels[ref_index], hyp_labels[hyp_index])
            pairs.append(pair)

        for ref_index in ref_no_match:
            pair = utils.LabelPair(ref_labels[ref_index], None)
            pairs.append(pair)

        for hyp_index in hyp_no_match:
            pair = utils.LabelPair(None, hyp_labels[hyp_index])
            pairs.append(pair)

        return pairs