Source code for evalmate.alignment.time

import numpy as np
import scipy

from evalmate.utils import label

from . import utils
from . import aligner
from . import candidates


[docs]class BipartiteMatchingAligner(aligner.EventAligner): """ Create event-based alignment, based on bipartite matching. 1. In a first step for every possible label-pair between ref and hyp, it is decided if a mapping of such a pair is possible. For this a CandidateFinder is used. 2. Using penalty and weight parameters, for every pair a penalty is computed for aligning the pair. 3. From all the pairs and the computed probabilities, the best alignment is computed using bipartite matching. So that every label only occurs once in the final alignment. Arguments: candidate_finder (CandidateFinder): CandidateFinder to use for finding potential labels for alignment. non_overlap_penalty_weight (float): Weight-factor of penalty for the non-overlapping ratio between two labels. substitution_penalty (float): Penalty for aligning two labels with different values. deletion_penalty (float): Penalty for aligning a reference-label with no hypothesis-label. insertion_penalty (float): Penalty for aligning a hypothesis-label with no reference-label. """ def __init__(self, candidate_finder=None, non_overlap_penalty_weight=1, substitution_penalty=2, insertion_penalty=10, deletion_penalty=10): if candidate_finder is None: self.candidate_finder = candidates.OverlapCandidateFinder() else: self.candidate_finder = candidate_finder self.non_overlap_penalty_weight = non_overlap_penalty_weight self.substitution_penalty = substitution_penalty self.insertion_penalty = insertion_penalty self.deletion_penalty = deletion_penalty
[docs] def align(self, ref_labels, hyp_labels): """ Return an alignment between the events of the given label-lists. Args: ref_labels (list): The list containing labels of the ground truth. hyp_labels (list): The list containing labels of the system output. Returns: list: A list of :class:`evalmate.alignment.LabelPair`. Every pair contains one label (event) from the ground truth and one from the system output, that are aligned. One of them also can be ``None``. """ if len(ref_labels) == 0 and len(hyp_labels) == 0: return [] if len(ref_labels) == 0: return [utils.LabelPair(None, x) for x in hyp_labels] if len(hyp_labels) == 0: return [utils.LabelPair(x, None) for x in ref_labels] close_pairs, ref_no_match, hyp_no_match = self.candidate_finder.find( ref_labels, hyp_labels ) # Calculate a high penalty for invalid matches invalid_penalty = self.non_overlap_penalty_weight + self.insertion_penalty + self.deletion_penalty # Cost matrix: Add possible insertion/deletion rows/cols size = len(ref_labels) + len(hyp_labels) cost = np.full((size, size), invalid_penalty).astype(np.float) cost[len(ref_labels):, :] = self.insertion_penalty cost[:, len(hyp_labels):] = self.deletion_penalty for pair in close_pairs: ref = ref_labels[pair[0]] hyp = hyp_labels[pair[1]] penalty = 0 if ref.value != hyp.value: penalty += self.substitution_penalty penalty += self.non_overlap_penalty_weight * (1 - label.overlap_percentage(ref, hyp)) cost[pair[0], pair[1]] = penalty row_ind, col_ind = scipy.optimize.linear_sum_assignment(cost) matching = [] for i in range(len(row_ind)): ref_ind = row_ind[i] hyp_ind = col_ind[i] if ref_ind >= len(ref_labels): ref_ind = -1 if hyp_ind >= len(hyp_labels): hyp_ind = -1 if hyp_ind != -1 or ref_ind != -1: if ref_ind >= 0: ref_label = ref_labels[ref_ind] else: ref_label = None if hyp_ind >= 0: hyp_label = hyp_labels[hyp_ind] else: hyp_label = None matching.append(utils.LabelPair(ref_label, hyp_label)) return matching
[docs]class FullMatchingAligner(aligner.EventAligner): """ Event-based alignment, where all possible matches are returned. So a single label can occur multiple times, but with a different counterpart. Arguments: min_overlap (float): Number of seconds the segment of overlap has to be, to align two labels. If ``0``, any overlap is accepted. """ def __init__(self, min_overlap=0): self.min_overlap = min_overlap self.finder = candidates.OverlapCandidateFinder( min_overlap=min_overlap )
[docs] def align(self, ref_labels, hyp_labels): """ Return an alignment between the labels of the two label-lists. Args: ref_labels (list): The list containing labels of the ground truth. hyp_labels (list): The list containing labels of the system output. Returns: list: A list of :class:`evalmate.alignment.LabelPair`. Every pair contains one label (event) from the ground truth and one from the system output, that are aligned. One of them also can be ``None``. """ close_pairs, ref_no_match, hyp_no_match = self.finder.find(ref_labels, hyp_labels) pairs = [] for ref_index, hyp_index in close_pairs: pair = utils.LabelPair(ref_labels[ref_index], hyp_labels[hyp_index]) pairs.append(pair) for ref_index in ref_no_match: pair = utils.LabelPair(ref_labels[ref_index], None) pairs.append(pair) for hyp_index in hyp_no_match: pair = utils.LabelPair(None, hyp_labels[hyp_index]) pairs.append(pair) return pairs