Source code for congas.utils

""" Utils class

A set of utils function to run automatically an enetire inference cycle, plotting and saving results.

"""

import matplotlib.pyplot as plt
import pandas as pd
import os
from congas.Interface import Interface
import numpy as np
from pyro.optim import ClippedAdam
from pyro.infer import SVI, TraceEnum_ELBO
import torch

[docs]def plot_loss(loss, save = False, output = "run1"):
    plt.plot(loss)
    plt.title("ELBO")
    plt.xlabel("step")
    plt.ylabel("loss")
    if(save):
        plt.savefig(output + "_ELBO.png")


[docs]def dict_to_tensor(dict):
    for k,v in dict.items():
        if not torch.is_tensor(v):
            dict[k] = torch.tensor(v)

[docs]def run_analysis(data_dict,model , optim = ClippedAdam, elbo = TraceEnum_ELBO, inf_type = SVI,steps = 500, lr = 0.01, param_dict = {},MAP = True, seed = 3):

    """ Run an entire analysis with the minimum amount of parameters

    Simple function to run an entire step of inference and get the learned parameters back, less customizable than using
    directly the :class:`~congas.core.Interface` , but still should satisfy most of hte user.
    Look at the R interface for even a easier


    Args:
        data_dict: dictionary with parameters
        model: a model from one in congas.models
        optim: an optimizer from pyro.optim
        elbo: a loss function from pyro.infer
        inf_type: SVI or NUTS (Hemiltonian MCMC)
        steps: number of inference steps
        lr: learning rate
        param_dict: parameters for the model, look at the model documentation if you want to change them
        MAP: perform MAP over the last layer of random variable in the model or learn the parameters of the distribution
        seed: seed for pyro.set_rng_seed
        step_post: steps if learning also posterior probabilities

    Returns:
        dict: dictionary of parameters:value
        list: loss (divided by sample size)  for every time step (not the one for posteriors)

    """



    interface = Interface(model, optim, elbo, inf_type)

    dict_to_tensor(data_dict)
    dict_to_tensor(param_dict)
    interface.initialize_model(data_dict)
    interface.set_model_params(param_dict)

    loss = interface.run(steps= steps, seed=seed, param_optimizer={'lr' : lr}, MAP = MAP)
    parameters = interface.learned_parameters()

    return parameters, loss

[docs]def load_simulation_seg(dir, prefix):

    """ Read data from companion R package simulation

    A function to read the

    Args:
        dir: directory where the simulation files are stored
        prefix:

    Returns:

    """

    data = pd.read_csv(dir + os.sep + prefix + "_data.csv")
    cnv = pd.read_csv(dir + os.sep + prefix + "_cnv.csv")
    data = torch.tensor(data.values, dtype=torch.float32).t()
    segments, num_observations = data.shape
    ploidy = torch.tensor(cnv["ploidy_real"], dtype=torch.float32)
    mu = torch.tensor(cnv["mu"])

    return {"data" : data, "pld" : ploidy, "segments": segments,"mu" : mu}





[docs]def write_results(params, prefix, new_dir = False, dir_pref = None):
    """ Write parameters

    This function writes the parameters appending a prefix and optionally in a new directory

    Args:
        params: parameters dictionary
        prefix: prefix to append to the filenames
        new_dir: create a new directory or use an exsisting ones
        dir_pref: name of the directory

    """

    if (new_dir):
        try:
            os.mkdir(dir_pref)
        except FileExistsError:
            print("Directory already existing, saving there", flush=True)

        out_prefix = "." + os.sep + dir_pref + os.sep + prefix + "_"
    else:
        out_prefix = prefix + "_"

    for i in params:
            np.savetxt(out_prefix + i + ".txt", params[i], delimiter="\t")

[docs]def log_sum_exp(args):
    c = torch.amax(args, dim=0)
    return c + torch.log(torch.sum(torch.exp(args - c), axis=0))