Source code for decibel.evaluator.evaluator

import mir_eval
import pandas
import numpy as np
from os import path

import decibel.import_export.midi_alignment_score_io
from decibel.import_export import filehandler
import multiprocessing as mp


def _directional_hamming_distance(reference_intervals, estimated_intervals):
    """
    Compute the directional hamming distance between the reference intervals and the estimated intervals

    :param reference_intervals: GT intervals
    :param estimated_intervals: Estimated intervals
    :return: Directional hamming distance between reference intervals and estimates intervals
    """
    est_ts = np.unique(estimated_intervals.flatten())
    seg = 0.
    for start, end in reference_intervals:
        dur = end - start
        between_start_end = est_ts[(est_ts >= start) & (est_ts < end)]
        seg_ts = np.hstack([start, between_start_end, end])
        seg += dur - np.diff(seg_ts).max()
    return seg / (reference_intervals[-1, 1] - reference_intervals[0, 0])


[docs]def evaluate(ground_truth_lab_path, my_lab_path):
    """
    Evaluate the chord label sequence in my_lab_path, compared to the ground truth sequence in ground_truth_lab_path

    :param ground_truth_lab_path: Path to .lab file of ground truth chord label sequence
    :param my_lab_path: Path to .lab file of estimated chord label sequence
    :return: CSR, over-segmentation, under-segmentation, segmentation
    """
    (ref_intervals, ref_labels) = \
        mir_eval.io.load_labeled_intervals(ground_truth_lab_path)
    (est_intervals, est_labels) = \
        mir_eval.io.load_labeled_intervals(my_lab_path)
    est_intervals, est_labels = mir_eval.util.adjust_intervals(est_intervals, est_labels, ref_intervals.min(),
                                                               ref_intervals.max(), mir_eval.chord.NO_CHORD,
                                                               mir_eval.chord.NO_CHORD)
    (intervals, ref_labels, est_labels) = \
        mir_eval.util.merge_labeled_intervals(ref_intervals, ref_labels, est_intervals, est_labels)
    durations = mir_eval.util.intervals_to_durations(intervals)
    comparisons_maj_min = mir_eval.chord.majmin(ref_labels, est_labels)
    score_maj_min = mir_eval.chord.weighted_accuracy(comparisons_maj_min, durations)

    overseg = 1 - _directional_hamming_distance(ref_intervals, est_intervals)
    underseg = 1 - _directional_hamming_distance(est_intervals, ref_intervals)
    seg = min(overseg, underseg)

    return score_maj_min, overseg, underseg, seg


[docs]def evaluate_method(all_songs, method_name, get_lab_function):
    """
    Evaluate all songs from our data set for one specific chord estimation technique, for which you get the labels using
    get_lab_function

    :param all_songs: All songs in our data set
    :param method_name: Name of the method (e.g. 'CHF_2017_DF_BEST')
    :param get_lab_function: A function that takes the song and outputs the lab path
    :return: Pandas DataFrame with results
    """
    result_dict = dict()

    for song_key in all_songs:
        song = all_songs[song_key]
        if song.full_ground_truth_chord_labs_path != '' and path.isfile(get_lab_function(song)):
            # This song has a ground truth and an estimation, so we can evaluate it
            result_dict[song_key] = list(evaluate(song.full_ground_truth_chord_labs_path, get_lab_function(song)))

    result_df = pandas.DataFrame.from_dict(result_dict, orient='index',
                                           columns=[method_name + '_' + m for m in ['CSR', 'OvS', 'UnS', 'Seg']])

    return result_df


[docs]def write_method_evaluations(all_songs, method_name, get_lab_function):
    """
    Write evaluations for all songs from our data set that have not been evaluated yet.

    :param all_songs: All songs in our data set
    :param method_name: Name of the method (e.g. 'CHF_2017_DF_BEST')
    :param get_lab_function: A function that takes the song and outputs the lab path
    """

    evaluation_path = filehandler.get_evaluation_table_path(method_name)
    if not path.isfile(evaluation_path):
        evaluation_df = evaluate_method(all_songs, method_name, get_lab_function)
        evaluation_df.to_csv(evaluation_path)


def _evaluate_audio_type(all_songs, df_combination_and_selection_types, audio_type):
    """
    Evaluate all songs and selected df_types and selection_names for the selected audio_type

    :param all_songs: All songs in our data set
    :param df_combination_and_selection_types: Data combination/selection types (rnd/mv/df)-(all/best/actual-best)
    :param audio_type: Audio method to test (CHF_2017 or one of the MIREX methods)
    :return: String indicating if the evaluation succeeded
    """
    def get_lab_function(song):
        if audio_type == 'CHF_2017':
            return song.full_chordify_chord_labs_path
        else:
            return filehandler.get_full_mirex_chord_labs_path(song, audio_type)

    write_method_evaluations(all_songs, audio_type, get_lab_function)

    for df_type, selection_name in df_combination_and_selection_types:
        # Evaluate this method of combining audio, MIDI and tabs
        method_name = audio_type + '_' + df_type.upper() + '-' + selection_name.upper()
        write_method_evaluations(
            all_songs, method_name,
            lambda song: filehandler.get_data_fusion_path(song.key, df_type, selection_name, audio_type))

    return audio_type + ' was evaluated.'


[docs]def evaluate_song_based(all_songs):
    """
    Evaluate all songs in the data set in parallel

    :param all_songs: All song in the data set
    :return: Print statement indicating that the evaluation was finished
    """
    audio_types = ['CHF_2017'] + filehandler.MIREX_SUBMISSION_NAMES
    df_combination_and_selection_types = [('rnd', 'all'), ('mv', 'all'), ('df', 'all'),
                                          ('rnd', 'best'), ('mv', 'best'), ('df', 'best'),
                                          ('df', 'actual-best'),
                                          ('rnd', 'alltab'), ('rnd', 'besttab'),
                                          ('rnd', 'allmidi'), ('rnd', 'bestmidi'),
                                          ('mv', 'alltab'), ('mv', 'besttab'),
                                          ('mv', 'allmidi'), ('mv', 'bestmidi'),
                                          ('df', 'alltab'), ('df', 'besttab'),
                                          ('df', 'allmidi'), ('df', 'bestmidi')]

    # for audio_type in audio_types:
    #     _evaluate_audio_type(all_songs, df_combination_and_selection_types, audio_type)

    pool = mp.Pool(mp.cpu_count() - 1)  # use all available cores except one
    for audio_type in audio_types:
        pool.apply_async(_evaluate_audio_type, args=(all_songs, df_combination_and_selection_types, audio_type),
                         callback=print)
    pool.close()
    pool.join()
    print('Evaluation finished!')


[docs]def evaluate_midis(all_songs) -> None:
    """
    Evaluate all lab files based on MIDI alignment and chord estimation

    :param all_songs: All songs in the data set
    """
    for segmentation_type in 'bar', 'beat':
        result_csv_path = filehandler.MIDILABS_RESULTS_PATHS[segmentation_type]
        if not path.isfile(result_csv_path):
            # Results were not calculated yet
            with open(result_csv_path, 'w') as write_file:
                for song_key in all_songs:
                    song = all_songs[song_key]
                    for midi_path in song.full_midi_paths:
                        midi_name = filehandler.get_file_name_from_full_path(midi_path)
                        alignment_score = \
                            decibel.import_export.midi_alignment_score_io.read_chord_alignment_score(midi_name)
                        chord_probability = filehandler.read_midi_chord_probability(segmentation_type, midi_name)
                        midi_lab_path = filehandler.get_full_midi_chord_labs_path(midi_name, segmentation_type)

                        # Calculate CSR and write
                        csr, overseg, underseg, seg = evaluate(song.full_ground_truth_chord_labs_path,
                                                               midi_lab_path)
                        write_file.write(
                            '{0};{1};{2};{3};{4};{5};{6};{7};{8}\n'.format(
                                str(song_key), str(song.duration), str(midi_name), str(alignment_score),
                                str(chord_probability),
                                str(csr), str(overseg), str(underseg), str(seg)))


[docs]def evaluate_tabs(all_songs) -> None:
    """
    Evaluate all lab files based on tab parsing and alignment.

    :param all_songs: All songs in our data set.
    """
    result_csv_path = filehandler.TABLABS_RESULTS_PATH
    if not path.isfile(result_csv_path):
        # Results were not calculated yet
        with open(result_csv_path, 'w') as write_file:
            for song_key in all_songs:
                song = all_songs[song_key]
                for tab_path in song.full_tab_paths:
                    tab_write_path = filehandler.get_full_tab_chord_labs_path(tab_path)
                    if filehandler.file_exists(tab_write_path):
                        likelihood, transposition = filehandler.read_log_likelihood(song_key, tab_write_path)
                        if filehandler.file_exists(tab_write_path):
                            csr, overseg, underseg, seg = evaluate(song.full_ground_truth_chord_labs_path,
                                                                   tab_write_path)
                            write_file.write('{0};{1};{2};{3};{4};{5};{6};{7};{8}\n'.format(
                                str(song_key), str(song.duration), str(filehandler.get_relative_path(tab_write_path)),
                                str(likelihood), str(transposition), str(csr), str(overseg), str(underseg), str(seg)))