Source code for musisep.dictsep.main

#!python3

"""
Wrapper for the dictionary learning algorithm.  When invoked, the audio
sources in the supplied audio file are separated.
"""

from __future__ import absolute_import, division, print_function

import numpy as np
import os.path
import pickle

from ..audio import spect
from ..audio import wav
from ..audio import performance
from . import dictlearn

[docs]def main(mixed_soundfile, orig_soundfiles, inst_num, tone_num, pexp, qexp,
         har, sigmas, sampdist, spectheight, logspectheight,
         minfreq, maxfreq, out_name, runs, lifetime, num_dicts, mask,
         plot_range):
    """
    Wrapper function for the dictionary learning algorithm.

    Parameters
    ----------
    mixed_soundfile : string
        Name of the mixed input file
    orig_soundfiles : list of string or NoneType
        Names of the files with the isolated instrument tracks or None
    inst_num : int
        Number of instruments
    tone_num : int
        Maximum number of simultaneous tones
    pexp : float
        Exponent for the addition of sinusoids
    qexp : float
        Exponent to be applied on the spectrum
    har : int
        Number of harmonics
    sigmas : float
        Number of standard deviations after which to cut the window/kernel
    sampdist : int
        Time intervals to sample the spectrogram
    spectheight : int
        Height of the linear-frequency spectrogram
    logspectheight : int
        Height of the log-frequency spectrogram
    minfreq : float
        Minimum frequency in Hz to be represented (included)
    maxfreq : float
        Maximum frequency in Hz to be represented (excluded)
    out_name : string
        Prefix for the file names
    runs : int
        Number of training iterations to perform
    lifetime : int
        Number of steps after which to renew the dictionary
    num_dicts : int
        Number of different dictionaries to generate and train
    mask : bool
        Whether to apply spectral masking
    plot_range : slice or NoneType
        part of the spectrogram to plot
    """

    signal, samprate = wav.read(mixed_soundfile)

    orig_spectrum = spect.spectrogram(
        signal, spectheight, sigmas, sampdist)[:spectheight, :]
    if plot_range is not None:
        spect.spectwrite('output/{}-orig.png'.format(out_name),
                            orig_spectrum[:spectheight, plot_range])

    if orig_soundfiles is None:
        orig_signals = None
    else:
        orig_signals = np.asarray([wav.read(f)[0] for f in orig_soundfiles])
        orig_spectrums = [spect.spectrogram(
            os, spectheight, sigmas, sampdist)[:spectheight, :]
            for os in orig_signals]

    fsigma = np.pi/sigmas

    if os.path.exists('output/{}.npy'.format(out_name)):
        logspect = np.load('output/{}.npy'.format(out_name))
    else:
        logspect, linspect = spect.logspect_pursuit(signal, spectheight,
                                                    sigmas, sampdist, None,
                                                    minfreq/samprate,
                                                    maxfreq/samprate,
                                                    logspectheight, fsigma)
        np.save('output/{}.npy'.format(out_name), logspect)

        if plot_range is not None:
            spect.spectwrite('output/{}-log.png'.format(out_name),
                             logspect[:, plot_range])
            spect.spectwrite('output/{}-lin.png'.format(out_name),
                             linspect[:, plot_range])

    audio_measures = []
    spectrum_measures = []

    for r in range(num_dicts):
        print("seed: {}".format(r))
        out_name = 'mozart-{}'.format(r)
        np.random.seed(r)
        
        if os.path.exists('output/{}-dict.npy'.format(out_name)):
            inst_dict = np.load('output/{}-dict.npy'.format(out_name))
        else:
            inst_dict = dictlearn.learn_spect_dict(
                logspect, fsigma, tone_num, inst_num*2, pexp, qexp,
                har, logspectheight, minfreq, maxfreq, runs, lifetime)
            np.save('output/{}-dict.npy'.format(out_name), inst_dict)
    
        print(inst_dict)

        if os.path.exists('output/{}-spect.pkl'.format(out_name)):
            [dict_spectrum, inst_spectrums,
             dict_spectrum_lin, inst_spectrums_lin] = \
                pickle.load(open('output/{}-spect.pkl'.format(out_name), 'rb'))
        else:
            (dict_spectrum, inst_spectrums,
             dict_spectrum_lin, inst_spectrums_lin) = \
                dictlearn.synth_spect(
                    logspect, tone_num, inst_dict, fsigma,
                    spectheight, pexp, qexp,
                    minfreq/samprate, maxfreq/samprate)
            pickle.dump([dict_spectrum, inst_spectrums,
                         dict_spectrum_lin, inst_spectrums_lin],
                        open('output/{}-spect.pkl'.format(out_name), 'wb'))
    
        inst_spectrums_lin, mask_spect = dictlearn.mask_spectrums(
            inst_spectrums_lin, orig_spectrum)
        dict_spectrum_lin = dict_spectrum_lin * mask_spect

        if plot_range is not None:
            spect.spectwrite('output/{}-synth.png'.format(out_name),
                             dict_spectrum[:, plot_range])
            spect.spectwrite('output/{}-synth-lin.png'.format(out_name),
                             dict_spectrum_lin[:, plot_range])
            for i in range(len(inst_spectrums)):
                spect.spectwrite(
                    'output/{}-synth{}.png'.format(out_name, i),
                    inst_spectrums[i][:, plot_range])
                spect.spectwrite(
                    'output/{}-synth{}-lin.png'.format(out_name, i),
                    inst_spectrums_lin[i][:, plot_range])

        siglen = signal.size
        synth_signals = np.zeros((inst_num, siglen))
        audio, _ = spect.synth_audio(dict_spectrum_lin, siglen,
                                     sigmas, sampdist, 1, signal)
        wav.write('output/{}-synth.wav'.format(out_name), audio, samprate)
        for i in range(len(inst_spectrums_lin)):
            audio, _ = spect.synth_audio(inst_spectrums_lin[i],
                                         siglen, sigmas, sampdist, 1,
                                         signal)
            synth_signals[i, :] = audio
            wav.write('output/{}-synth{}.wav'.format(out_name, i),
                      audio, samprate)

        if orig_signals is not None:
            _, ap = performance.select_perm(*performance.measures(
                synth_signals, orig_signals))
            audio_measures.append(ap)
            print(ap)

            _, sp = performance.select_perm(*performance.measures(
                np.vstack([np.ravel(spect) for spect in inst_spectrums_lin]),
                np.vstack([np.ravel(spect) for spect in orig_spectrums])))
            spectrum_measures.append(sp)
            print(sp)

    if orig_signals is not None:
        print("Global measures:")
        print(np.mean(np.asarray(audio_measures), axis=0))
        print(np.mean(np.asarray(spectrum_measures), axis=0))

if __name__ == '__main__':
    main(mixed_soundfile='input/mozart/mix.wav',
         orig_soundfiles=['input/mozart/recorder.wav',
                          'input/mozart/violin.wav'],
         inst_num=2,
         tone_num=2,
         pexp=2,
         qexp=1/2,
         har=25,
         sigmas=6,
         sampdist=256,
         spectheight=6*1024,
         logspectheight=1024,
         minfreq=20,
         maxfreq=20480,
         out_name='mozart',
         runs=100000,
         lifetime=500,
         num_dicts=10,
         mask=True,
         plot_range=slice(0, 1580))
Source code for musisep.dictsep.__main__

Source code for musisep.dictsep.main