import time
import pandas as pd
import scphylo as scp
from scphylo.external.gpps import gpps_hc, gpps_ilp
[docs]def gpps(
df_input,
alpha,
beta,
k_dollo=0,
max_del=-1,
neighbor_size=30,
n_iters=100,
time_limit=86400,
n_threads=1,
):
"""Solving using gpps.
an ILP-based approach for inferring cancer progression with mutation losses from
single cell data :cite:`gpps`.
Parameters
----------
df_input : :class:`pandas.DataFrame`
Input genotype matrix in which rows are cells and columns are mutations.
Values inside this matrix show the presence (1), absence (0) and missing
entires (3).
alpha : :obj:`float`
False positive error rate.
beta : :obj:`float`
False negative error rate.
k_dollo : :obj:`int`, optional
k for Dollo model, by default 0
max_del : :obj:`int`, optional
Maximum number of deletion allowed, by default -1
neighbor_size : :obj:`int`, optional
Hill climbing neighborhood size, by default 30
n_iters : :obj:`int`, optional
Hill climbing maximum iterations, by default 100
time_limit : :obj:`int`, optional
Time limit (in seconds), by default 86400
n_threads : :obj:`int`, optional
Number of threads, by default 1
Returns
-------
:class:`pandas.DataFrame`
A conflict-free matrix in which rows are cells and columns are mutations.
Values inside this matrix show the presence (1) and absence (0).
"""
scp.logg.info(
f"running gpps with alpha={alpha}, beta={beta}, k_dollo={k_dollo}, "
f"max_del={max_del}, neighbor_size={neighbor_size}, n_iters={n_iters}, "
f"time_limit={time_limit}, n_threads={n_threads}"
)
cells = list(df_input.index)
snvs = list(df_input.columns)
s_time = time.time()
ilp_matrix = gpps_ilp(
df_input.values,
alpha=beta, # gpps takes a as false-negative and b as false-positive
beta=alpha,
k_dollo=k_dollo,
max_del=max_del,
time_limit=time_limit,
n_threads=n_threads,
)
ilp_matrix = pd.DataFrame(ilp_matrix)
output_matrix = gpps_hc(
df_input.values,
ilp_matrix,
alpha=beta,
beta=alpha,
k_dollo=k_dollo,
mut_names=snvs,
ns=neighbor_size,
mi=n_iters,
)
e_time = time.time()
running_time = e_time - s_time
df_output = pd.DataFrame(output_matrix)
df_output.columns = snvs
df_output.index = cells
df_output.index.name = "cellIDxmutID"
scp.ul.stat(df_input, df_output, alpha, beta, running_time)
return df_output