Source code for scphylo.tl.partition_function._partition_function

import datetime
import time

import numpy as np
import pandas as pd
from joblib import Parallel, delayed

import scphylo as scp
from scphylo.tl.partition_function._pf import (
    get_samples,
    get_samples_info,
    process_samples,
)


[docs]def partition_function(df_input, alpha, beta, n_samples, n_batches, muts, cells): """Calculate the probability of a mutation seeding particular cells. Parameters ---------- df_input : :class:`pandas.DataFrame` Input genotype matrix. alpha : float False positive error rate. beta : float False negative error rate. n_samples : int Number of samples to get from the distribution (suggest: 1000) n_batches : int Number of batches to repeat the experiment (suggest: 100) muts : list The list of mutations cells : list The list of cells Returns ------- :class:`pandas.DataFrame` A table of probabilities for every mutation and every batch. """ df_output = pd.DataFrame(None, index=muts, columns=range(n_batches)) s_time = time.time() I_mtr = df_input.values t1 = I_mtr * (1 - beta) / (alpha + 1 - beta) t2 = (1 - I_mtr) * beta / (beta + 1 - alpha) P = t1 + t2 P[I_mtr == 3] = 0.5 my_muts = np.where(df_input.columns.isin(muts))[0] my_cells = np.where(df_input.index.isin(cells))[0] if len(my_muts) != len(muts): scp.logg.error("bad muts choise!") if len(my_cells) != len(cells): scp.logg.error("bad cells choise!") _, subtrees_list, tree_our_prob_list = get_samples(P, n_samples) def run(mut): my_mut = np.where(df_input.columns == mut)[0][0] pf_cond_list, tree_origin_prob_list, _, _, _ = get_samples_info( P, my_cells, my_mut, n_samples, subtrees_list ) estimates = process_samples( pf_cond_list, tree_origin_prob_list, tree_our_prob_list, n_batches ) return mut, estimates output = Parallel(n_jobs=len(muts))(delayed(run)(mut) for mut in muts) for mut, estimates in output: df_output.loc[mut] = estimates e_time = time.time() running_time = e_time - s_time scp.logg.info(f"elapsed time: {datetime.timedelta(seconds=running_time)}") return df_output.astype(float)