Source code for decibel.audio_tab_aligner.jump_alignment

from typing import Dict

import numpy as np

from decibel.audio_tab_aligner.hmm_parameters import HMMParameters
from decibel.import_export import filehandler
from decibel.import_export.untimed_chord_sequence_io import read_untimed_chord_sequence
from decibel.music_objects.chord import Chord
from decibel.music_objects.chord_alphabet import ChordAlphabet
from decibel.music_objects.chord_vocabulary import ChordVocabulary
from decibel.music_objects.song import Song


def _calculate_altered_transition_matrix(nr_of_chords_in_tab: int, chord_ids: np.array,
                                         is_first_in_line: np.array, is_last_in_line: np.array,
                                         hmm_parameters: HMMParameters,
                                         p_f: float, p_b: float):
    """
    Calculate an altered transition matrix for the jump alignment algorithm

    :param nr_of_chords_in_tab: Number of chords in the tab file
    :param chord_ids: Numbers of the chords (indexes in the chord_vocabulary)
    :param is_first_in_line: Boolean array: is this chord first in its line?
    :param is_last_in_line: Boolean array: is this chord last in its line?
    :param hmm_parameters: HMMParameters obtained in the training phase
    :param p_f: Forward probability
    :param p_b: Backward probability
    :return: New transition matrix
    """
    altered_transition_matrix = np.zeros((nr_of_chords_in_tab, nr_of_chords_in_tab))
    for i in range(nr_of_chords_in_tab):
        for j in range(nr_of_chords_in_tab):
            if i == j:
                altered_transition_matrix[i, j] = hmm_parameters.trans[chord_ids[i], chord_ids[i]]
            elif i == j - 1:
                altered_transition_matrix[i, j] = hmm_parameters.trans[chord_ids[i], chord_ids[j]]
            elif is_last_in_line[i] == 1 and is_first_in_line[j] == 1:
                if i < j:
                    altered_transition_matrix[i, j] = p_f * hmm_parameters.trans[chord_ids[i], chord_ids[j]]
                else:
                    altered_transition_matrix[i, j] = p_b * hmm_parameters.trans[chord_ids[i], chord_ids[j]]
    # Normalize altered transition matrix
    for i in range(nr_of_chords_in_tab):
        altered_transition_matrix[i] = altered_transition_matrix[i] / sum(altered_transition_matrix[i])

    return altered_transition_matrix


def _chord_label_to_chord_str(chord_label: int, alphabet: ChordAlphabet) -> str:
    """
    Translate the integer chord label to a chord string

    :param chord_label: Chord index in the chord_vocabulary (integer)
    :return: Chord string (str)
    """
    if chord_label == 0:
        return 'N'
    return str(Chord.from_common_tab_notation_string(alphabet.alphabet_list[chord_label]))


def _transpose_chord_label(chord_label: int, nr_semitones_higher: int, alphabet: ChordAlphabet) -> int:
    """
    Transpose a chord label up with the specified number of semitones

    :param chord_label: The index of the chord label that needs to be higher
    :param nr_semitones_higher: The number of semitones the chord label needs to be higher
    :return: Index of the transposed chord label
    """
    if chord_label == 0:
        return 0
    nr_semitones_higher = nr_semitones_higher % 12
    if alphabet.chord_vocabulary_name == 'MajMin':
        mode = int((chord_label - 1) / 12)
        key = (chord_label - 1) % 12
        key += nr_semitones_higher
        if key >= 12:
            key -= 12
        return 12 * mode + key + 1

    raise NotImplementedError('This is not (yet?) supported for chord vocabularies other than "MajMin".')
    # TODO Implement for other chord vocabularies (e.g. seventh chords)


def _read_tab_file_path(chords_from_tab_file_path: str, alphabet: ChordAlphabet) -> (int, np.array, np.array, np.array):
    """
    Load chord information from chords_from_tab_file_path

    :param chords_from_tab_file_path: File that contains chord information
    :return: (nr_of_chords_in_tab, chord_ids, is_first_in_line, is_last_in_line)
    """
    # Load .txt file consisting of: [line_nr, segment_nr, system_nr, chord_x, chord_str] (UntimedChordSequence)
    untimed_chord_sequence = read_untimed_chord_sequence(chords_from_tab_file_path)
    nr_of_chords_in_tab = len(untimed_chord_sequence.untimed_chord_sequence_item_items)
    # If we found less than 5 chords, we will not use this tab
    if nr_of_chords_in_tab < 5:
        return nr_of_chords_in_tab, [], [], []

    # Chord id's
    line_nrs = [ucs_item.line_nr for ucs_item in untimed_chord_sequence.untimed_chord_sequence_item_items]
    chord_ids = np.zeros(nr_of_chords_in_tab).astype(int)
    for i in range(nr_of_chords_in_tab):
        chord_ids[i] = alphabet.get_index_of_chord_in_alphabet(
            Chord.from_harte_chord_string(untimed_chord_sequence.untimed_chord_sequence_item_items[i].chord_str))

    # Array: is this chord first and/or last in its line?
    is_first_in_line = np.zeros(nr_of_chords_in_tab).astype(int)
    is_first_in_line[0] = 1
    for i in range(1, nr_of_chords_in_tab):
        if line_nrs[i] != line_nrs[i - 1]:
            is_first_in_line[i] = 1
    is_last_in_line = np.zeros(nr_of_chords_in_tab).astype(int)
    is_last_in_line[-1] = 1
    for i in range(nr_of_chords_in_tab - 1):
        if line_nrs[i] != line_nrs[i + 1]:
            is_last_in_line[i] = 1

    return nr_of_chords_in_tab, chord_ids, is_first_in_line, is_last_in_line


[docs]def train(chord_vocabulary: ChordVocabulary, train_songs: Dict[int, Song]) -> HMMParameters: """ Train the HMM parameters on training_set for the given chords_list vocabulary :param chord_vocabulary: List of chords in our vocabulary :param train_songs: Set of songs for training :return: HMM Parameters """ # Convert the vocabulary to a ChordAlphabet alphabet = ChordAlphabet(chord_vocabulary) alphabet_size = len(alphabet.alphabet_list) # Initialize chord_beat_matrix_per_chord: a list with |chord_vocabulary| x |beats| list for each chord chroma_beat_matrix_per_chord = [[] for _ in alphabet.alphabet_list] # Initialize transition_matrix and init_matrix trans = np.ones((alphabet_size, alphabet_size)) init = np.ones(alphabet_size) # Iterate over the songs; fill chroma_beat_matrix_per_chord, init_matrix and transition_matrix for train_song_key, train_song in train_songs.items(): train_song.audio_features_path = filehandler.get_full_audio_features_path(train_song_key) if train_song.audio_features_path != '': # We have audio features and labels for this song; load them (otherwise ignore the song) features = np.load(train_song.audio_features_path) chord_index_list = [] # Iterate over the beats, fill chroma_beat_matrix_per_chord and chord_index_list for frame_index in range(features.shape[0]): chroma = features[frame_index, 1:13].astype(float) chord_index = alphabet.get_index_of_chord_in_alphabet( Chord.from_harte_chord_string(features[frame_index, 13])) chord_index_list.append(chord_index) chroma_beat_matrix_per_chord[chord_index].append(chroma) # Add first chord to init_matrix init[chord_index_list[0]] += 1 # Add each chord transition to transition_matrix for i in range(0, len(chord_index_list) - 1): trans[chord_index_list[i], chord_index_list[i + 1]] += 1 # Normalize transition and init matrices init = init / sum(init) trans = np.array([trans[i] / sum(trans[i]) for i in range(alphabet_size)]) # Calculate mean and covariance matrices obs_mu = np.zeros((alphabet_size, 12)) obs_sigma = np.zeros((alphabet_size, 12, 12)) for i in range(alphabet_size): chroma_beat_matrix_per_chord[i] = np.array(chroma_beat_matrix_per_chord[i]).T obs_mu[i] = np.mean(chroma_beat_matrix_per_chord[i], axis=1) obs_sigma[i] = np.cov(chroma_beat_matrix_per_chord[i], ddof=0) # Calculate additional values so we can calculate the emission probability more easily twelve_log_two_pi = 12 * np.log(2 * np.pi) log_det_sigma = np.zeros(alphabet_size) sigma_inverse = np.zeros(obs_sigma.shape) for i in range(alphabet_size): log_det_sigma[i] = np.log(np.linalg.det(obs_sigma[i])) sigma_inverse[i] = np.mat(np.linalg.pinv(obs_sigma[i])) return HMMParameters(alphabet=alphabet, trans=trans, init=init, obs_mu=obs_mu, obs_sigma=obs_sigma, log_det_sigma=log_det_sigma, sigma_inverse=sigma_inverse, twelve_log_two_pi=twelve_log_two_pi, trained_on_keys=list(train_songs.keys()))
[docs]def jump_alignment(chords_from_tab_file_path: str, audio_features_path: str, lab_write_path: str, hmm_parameters: HMMParameters, p_f: float = 0.05, p_b: float = 0.05) -> (float, int): """ Calculate the optimal alignment between tab file and audio :param chords_from_tab_file_path: Path to chords from tab file :param audio_features_path: Path to audio features :param lab_write_path: Path to the file to write the chord labels to :param hmm_parameters: HMMParameters obtained in the training phase :param p_f: Forward probability :param p_b: Backward probability :return: best likelihood and best transposition """ # Load chord information from chords_from_tab_file_path nr_of_chords_in_tab, chord_ids, is_first_in_line, is_last_in_line = \ _read_tab_file_path(chords_from_tab_file_path, hmm_parameters.alphabet) if nr_of_chords_in_tab < 5: return None, 0 # Calculate the emission probability matrix for this song alphabet_size = len(hmm_parameters.alphabet.alphabet_list) features = np.load(audio_features_path)[:, 1:13].astype(float) nr_beats = features.shape[0] log_emission_probability_matrix = np.zeros((alphabet_size, nr_beats)) for i in range(alphabet_size): for b in range(nr_beats): om = np.mat(features[b] - hmm_parameters.obs_mu[i]) log_emission_probability_matrix[i, b] = \ (hmm_parameters.log_det_sigma[i] + om * hmm_parameters.sigma_inverse[i] * om.T + hmm_parameters.twelve_log_two_pi) / -2 best_transposition, best_g, best_tr, best_last_chord, best_likelihood = -1, None, None, -1, -float('inf') for semitone_transposition in range(12): # Transpose transposed_chord_ids = \ np.array([_transpose_chord_label(c_i, semitone_transposition, hmm_parameters.alphabet) for c_i in chord_ids]) # Fill altered transition matrix altered_transition_matrix = _calculate_altered_transition_matrix(nr_of_chords_in_tab, transposed_chord_ids, is_first_in_line, is_last_in_line, hmm_parameters, p_f, p_b) # Initialize travel grid g = np.zeros((nr_beats, nr_of_chords_in_tab)) tr = np.zeros((nr_beats, nr_of_chords_in_tab), dtype='uint8') for j in range(nr_of_chords_in_tab): g[0, j] = log_emission_probability_matrix[transposed_chord_ids[j], 0] + \ np.log(hmm_parameters.init[transposed_chord_ids[j]]) for i in range(1, nr_beats): for j in range(nr_of_chords_in_tab): maximum = -float('inf') max_chord = -1 for c in range(nr_of_chords_in_tab): if altered_transition_matrix[c, j] > 0 and g[i - 1, c] + \ np.log(altered_transition_matrix[c, j]) > maximum: maximum = g[i - 1, c] + np.log(altered_transition_matrix[c, j]) max_chord = c g[i, j] = log_emission_probability_matrix[transposed_chord_ids[j], i] + maximum tr[i, j] = max_chord # Find log likelihood and best last chord log_likelihood = -float('inf') last_chord = -1 for c in range(nr_of_chords_in_tab): if g[-1, c] > log_likelihood: log_likelihood = g[-1, c] last_chord = c # Save these travel grids only if the transposition had the best likelihood until now if log_likelihood > best_likelihood: best_transposition, best_g, best_tr, best_last_chord, best_likelihood = \ semitone_transposition, g, tr, last_chord, log_likelihood # Transpose to the best transposition transposed_chord_ids = np.array([_transpose_chord_label(c_i, best_transposition, hmm_parameters.alphabet) for c_i in chord_ids]) # Derive the Viterbi path viterbi_path_reversed = [best_last_chord] last_added = best_last_chord for b in range(nr_beats - 1, 0, -1): viterbi_path_reversed.append(best_tr[b, last_added]) last_added = best_tr[b, last_added] viterbi_path = list(reversed(viterbi_path_reversed)) viterbi_path = transposed_chord_ids[viterbi_path] # Export the Viterbi path beat_times = np.load(audio_features_path)[:, 0] beat_start = '0' last_chord = viterbi_path[0] with open(lab_write_path, 'w') as write_file: for b in range(len(beat_times) - 2): if viterbi_path[b] != last_chord: chord_str = _chord_label_to_chord_str(viterbi_path[b - 1], hmm_parameters.alphabet) write_file.write(beat_start + ' ' + beat_times[b] + ' ' + chord_str + '\n') beat_start = beat_times[b] last_chord = viterbi_path[b] if beat_times[len(beat_times) - 2] != beat_start: chord_str = _chord_label_to_chord_str(viterbi_path[len(beat_times) - 2], hmm_parameters.alphabet) write_file.write(beat_start + ' ' + beat_times[len(beat_times) - 1] + ' ' + chord_str) return best_likelihood, best_transposition
[docs]def test_single_song(song: Song, hmm_parameters: HMMParameters) -> None: """ Estimate chords for each tab matched to the song and export them to a lab file. :param song: Song for which we estimate tab-based chords :param hmm_parameters: Parameters of the trained HMM """ audio_features_path = filehandler.get_full_audio_features_path(song.key) for full_tab_path in song.full_tab_paths: tab_chord_path = filehandler.get_chords_from_tab_filename(full_tab_path) tab_write_path = filehandler.get_full_tab_chord_labs_path(full_tab_path) if not filehandler.file_exists(tab_write_path): log_likelihood, transposition_semitone = \ jump_alignment(tab_chord_path, audio_features_path, tab_write_path, hmm_parameters) if log_likelihood is not None: # We found an alignment, write this to our log-likelihoods file if not tab_write_path.startswith(filehandler.DATA_PATH): print('WRITING ERROR') # Remove start of path tab_write_path = tab_write_path[len(filehandler.DATA_PATH) + 1:] filehandler.write_log_likelihood(song.key, tab_write_path, log_likelihood, transposition_semitone)