Source code for scphylo.tl.solver._sphyr
import os
import time
import pandas as pd
import scphylo as scp
[docs]def sphyr(
df_input,
alpha,
beta,
n_restarts=10,
n_threads=1,
time_limit=None,
n_cell_clusters=10,
n_mut_clusters=15,
):
"""Solving using SPhyR.
Tumor phylogeny estimation from single-cell sequencing data under loss and error
:cite:`SPhyR`.
Parameters
----------
df_input : :class:`pandas.DataFrame`
Input genotype matrix in which rows are cells and columns are mutations.
Values inside this matrix show the presence (1), absence (0) and missing
entires (3).
alpha : :obj:`float`
False positive error rate.
beta : :obj:`float`
False negative error rate.
n_restarts : :obj:`int`, optional
Number of threads, by default 10
n_threads : :obj:`int`, optional
Number of threads, by default 1
time_limit : :obj:`int`, optional
Time limit (in seconds), by default None
n_cell_clusters : :obj:`int`, optional
Number of cell clusters, by default 10
n_mut_clusters : :obj:`int`, optional
Number of mutation clusters, by default 15
Returns
-------
:class:`pandas.DataFrame`
A conflict-free matrix in which rows are cells and columns are mutations.
Values inside this matrix show the presence (1) and absence (0).
"""
executable = scp.ul.executable("sphyr_kDPFC", "SPhyR")
scp.logg.info(
f"running SPhyR with alpha={alpha}, beta={beta}, n_restarts={n_restarts}, "
f"n_threads={n_threads}, time_limit={time_limit}, "
f"n_cell_clusters={n_cell_clusters}, n_mut_clusters={n_mut_clusters}"
)
tmpdir = scp.ul.tmpdirsys(suffix=".sphyr")
# tmpdir = scp.ul.tmpdir(suffix=".sphyr")
with open(f"{tmpdir.name}/sphyr.input", "a") as fout:
fout.write(f"{df_input.shape[0]} #cells\n{df_input.shape[1]} #SNVs\n")
df_input.replace(3, -1).to_csv(fout, sep=" ", header=None, index=None)
with open(f"{tmpdir.name}/sphyr.cellnames", "w") as fout:
fout.write("\n".join(df_input.index) + "\n")
with open(f"{tmpdir.name}/sphyr.mutnames", "w") as fout:
fout.write("\n".join(df_input.columns) + "\n")
cmd = (
f"{executable} "
f"{tmpdir.name}/sphyr.input "
f"-a {alpha} "
f"-b {beta} "
f"-N {n_restarts} "
f"-t {n_threads} "
f"-lC {n_mut_clusters} "
f"-lT {n_cell_clusters} "
f"-T {time_limit if time_limit is not None else -1} "
"-k 0 "
f"> {tmpdir.name}/sphyr.output "
f"2> {tmpdir.name}/sphyr.log"
)
s_time = time.time()
os.system(cmd)
e_time = time.time()
running_time = e_time - s_time
df_output = pd.read_csv(
f"{tmpdir.name}/sphyr.output",
sep=" ",
skiprows=[0, 1],
header=None,
)
df_output.index = pd.read_csv(f"{tmpdir.name}/sphyr.cellnames", header=None)[0]
df_output.columns = pd.read_csv(f"{tmpdir.name}/sphyr.mutnames", header=None)[0]
df_output.index.name = "cellIDxmutID"
tmpdir.cleanup()
scp.ul.stat(df_input, df_output, alpha, beta, running_time)
return df_output