Source code for scphylo.tl.solver._phiscs

import math
import time
from itertools import combinations

import numpy as np
import pandas as pd
from pysat.examples.rc2 import RC2
from pysat.formula import WCNF
from scipy.stats import binom

import scphylo as scp
from scphylo.external._betabinom import pmf_BetaBinomial


[docs]def phiscsb(df_input, alpha, beta, experiment=False): """Solving using PhISCS-B (only in single-cell mode, no bulk). a combinatorial approach for subperfect tumor phylogeny reconstruction via integrative use of single-cell and bulk sequencing data :cite:`PhISCS`. Parameters ---------- df_input : :class:`pandas.DataFrame` Input genotype matrix in which rows are cells and columns are mutations. Values inside this matrix show the presence (1), absence (0) and missing entires (3). alpha : :obj:`float` False positive error rate. beta : :obj:`float` False negative error rate. experiment : :obj:`bool`, optional Is in the experiment mode (the log won't be shown), by default False Returns ------- :class:`pandas.DataFrame` A conflict-free matrix in which rows are cells and columns are mutations. Values inside this matrix show the presence (1) and absence (0). """ if not experiment: scp.logg.info(f"running PhISCS-B with alpha={alpha}, beta={beta}") cells = list(df_input.index) snvs = list(df_input.columns) df_input = df_input.replace("?", 3) df_input = df_input.astype(int) I_mtr = df_input.values rc2 = RC2(WCNF()) num_cells = len(cells) num_mutations = len(snvs) Y = np.empty((num_cells, num_mutations), dtype=np.int64) numVarY = 0 for i in range(num_cells): for j in range(num_mutations): numVarY += 1 Y[i, j] = numVarY B = np.empty((num_mutations, num_mutations, 2, 2), dtype=np.int64) numVarB = 0 for p in range(num_mutations): for q in range(p + 1, num_mutations): for i, j in [(0, 1), (1, 0), (1, 1)]: numVarB += 1 B[p, q, i, j] = numVarY + numVarB Z = np.empty((num_cells, num_mutations), dtype=np.int64) numVarZ = 0 for i in range(num_cells): for j in range(num_mutations): if I_mtr[i, j] == 0: numVarZ += 1 Z[i, j] = numVarY + numVarB + numVarZ for p in range(num_mutations): for q in range(p + 1, num_mutations): rc2.add_clause([-B[p, q, 0, 1], -B[p, q, 1, 0], -B[p, q, 1, 1]]) for i in range(num_cells): rc2.add_clause([-Y[i, p], -Y[i, q], B[p, q, 1, 1]]) rc2.add_clause([Y[i, p], -Y[i, q], B[p, q, 0, 1]]) rc2.add_clause([-Y[i, p], Y[i, q], B[p, q, 1, 0]]) for i in range(num_cells): for j in range(num_mutations): # 0->1 if alpha == 0: if I_mtr[i, j] == 0: rc2.add_clause([-Y[i, j]], weight=1) if I_mtr[i, j] == 1: rc2.add_clause([Y[i, j]]) # 0->1 and 1->0 if alpha > 0: if I_mtr[i, j] == 0: rc2.add_clause([Y[i, j], Z[i, j]]) rc2.add_clause([-Y[i, j], -Z[i, j]]) rc2.add_clause([Z[i, j]], weight=math.log((1 - alpha) / beta)) if I_mtr[i, j] == 1: rc2.add_clause([Y[i, j]], weight=math.log((1 - beta) / alpha)) s_time = time.time() variables = rc2.compute() e_time = time.time() running_time = e_time - s_time sol_Y = np.empty((num_cells, num_mutations), dtype=np.int8) numVar = 0 for i in range(num_cells): for j in range(num_mutations): sol_Y[i, j] = variables[numVar] > 0 numVar += 1 df_output = pd.DataFrame(sol_Y) df_output.columns = snvs df_output.index = cells df_output.index.name = "cellIDxmutID" if not experiment: scp.ul.stat(df_input, df_output, alpha, beta, running_time) return df_output
def phiscsi(df_input, alpha, beta, time_limit=86400, n_threads=1): """Solving using PhISCS-I (only in single-cell mode, no bulk). a combinatorial approach for subperfect tumor phylogeny reconstruction via integrative use of single-cell and bulk sequencing data :cite:`PhISCS`. Parameters ---------- df_input : :class:`pandas.DataFrame` Input genotype matrix in which rows are cells and columns are mutations. Values inside this matrix show the presence (1), absence (0) and missing entires (3). alpha : :obj:`float` False positive error rate. beta : :obj:`float` False negative error rate. time_limit : :obj:`int`, optional Time limit of the Gurobi running in seconds, by default 86400 (one day) n_threads : :obj:`int`, optional Number of threads for Gurobi solver, by default 1 Returns ------- :class:`pandas.DataFrame` A conflict-free matrix in which rows are cells and columns are mutations. Values inside this matrix show the presence (1) and absence (0). """ gp, gp_is_not_imported = scp.ul.import_gurobi() if gp_is_not_imported: scp.logg.error("Unable to import a package!") scp.logg.info( f"running PhISCS-I with alpha={alpha}, beta={beta}, time_limit={time_limit}, " f"n_threads={n_threads}" ) cells = list(df_input.index) snvs = list(df_input.columns) df_input = df_input.replace("?", 3) df_input = df_input.astype(int) I_mtr = df_input.values model = gp.Model("ILP") model.Params.OutputFlag = 0 model.Params.LogFile = "" model.Params.Threads = n_threads model.Params.TimeLimit = time_limit num_cells = len(cells) num_mutations = len(snvs) Y = {} B = {} for c in range(num_cells): for m in range(num_mutations): if alpha == 0: # 0->1 if I_mtr[c, m] == 0: Y[c, m] = model.addVar( vtype=gp.GRB.BINARY, obj=1, name=f"Y({c},{m})" ) elif I_mtr[c, m] == 1: Y[c, m] = 1 else: Y[c, m] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"Y({c},{m})" ) else: # 0->1 & 1->0 Y[c, m] = model.addVar(vtype=gp.GRB.BINARY, name=f"Y({c},{m})") for p in range(num_mutations): for q in range(p + 1, num_mutations): B[p, q, 1, 1] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"B[{p},{q},1,1]" ) B[p, q, 1, 0] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"B[{p},{q},1,0]" ) B[p, q, 0, 1] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"B[{p},{q},0,1]" ) for p in range(num_mutations): for q in range(p + 1, num_mutations): model.addConstr(B[p, q, 0, 1] + B[p, q, 1, 0] + B[p, q, 1, 1] <= 2) for i in range(num_cells): model.addConstr(Y[i, p] + Y[i, q] - B[p, q, 1, 1] <= 1) model.addConstr(-Y[i, p] + Y[i, q] - B[p, q, 0, 1] <= 0) model.addConstr(Y[i, p] - Y[i, q] - B[p, q, 1, 0] <= 0) if alpha == 0: # 0->1 model.Params.ModelSense = gp.GRB.MINIMIZE else: # 0->1 & 1->0 objective = 0 for j in range(num_mutations): for i in range(num_cells): if I_mtr[i, j] == 0: objective += ( np.log(1 - alpha) + np.log(beta / (1 - alpha)) * Y[i, j] ) if I_mtr[i, j] == 1: objective += np.log(alpha) + np.log((1 - beta) / alpha) * Y[i, j] model.setObjective(objective, gp.GRB.MAXIMIZE) s_time = time.time() model.optimize() e_time = time.time() running_time = e_time - s_time sol_Y = np.zeros((num_cells, num_mutations), dtype=np.int8) for i in range(num_cells): for j in range(num_mutations): if alpha == 0: if I_mtr[i, j] != 1: sol_Y[i, j] = Y[i, j].X > 0.5 else: sol_Y[i, j] = 1 else: sol_Y[i, j] = Y[i, j].X > 0.5 df_output = pd.DataFrame(sol_Y) df_output.columns = snvs df_output.index = cells df_output.index.name = "cellIDxmutID" scp.ul.stat(df_input, df_output, alpha, beta, running_time) return df_output def phiscsb_bulk( df_input, alpha, beta, kmax=0, vaf_info=None, delta=0.2, ): """Solving using PhISCS-B (in single-cell mode with bulk and mutation elimination). a combinatorial approach for subperfect tumor phylogeny reconstruction via integrative use of single-cell and bulk sequencing data :cite:`PhISCS`. Parameters ---------- df_input : :class:`pandas.DataFrame` Input genotype matrix in which rows are cells and columns are mutations. Values inside this matrix show the presence (1), absence (0) and missing entires (3). alpha : :obj:`float` False positive error rate. beta : :obj:`float` False negative error rate. kmax : :obj:`int`, optional Max number of mutations to be eliminated, by default 0 vaf_info : :class:`pandas.DataFrame`, optional Information about the variant allele frequency in bulk data The size is n_SNVs x n_samples, by default None delta : :obj:`float`, optional Delta parameter accounting for VAF variance, by default 0.2 Returns ------- :class:`pandas.DataFrame` A conflict-free matrix in which rows are cells and columns are mutations. Values inside this matrix show the presence (1) and absence (0). """ # TODO: implement scp.logg.info( f"running PhISCS-B-orig with alpha={alpha}, beta={beta}, kmax={kmax}, " f"vaf_info={vaf_info}, delta={delta}" ) cells = list(df_input.index) snvs = list(df_input.columns) df_input = df_input.replace("?", 3) df_input = df_input.astype(int) I_mtr = df_input.values rc2 = RC2(WCNF()) num_cells = len(cells) num_mutations = len(snvs) Y = np.empty((num_cells, num_mutations), dtype=np.int64) numVarY = 0 for i in range(num_cells): for j in range(num_mutations): numVarY += 1 Y[i, j] = numVarY B = np.empty((num_mutations, num_mutations, 2, 2), dtype=np.int64) numVarB = 0 for p in range(num_mutations): for q in range(p + 1, num_mutations): for i, j in [(0, 1), (1, 0), (1, 1)]: numVarB += 1 B[p, q, i, j] = numVarY + numVarB Z = np.empty((num_cells, num_mutations), dtype=np.int64) numVarZ = 0 for i in range(num_cells): for j in range(num_mutations): if I_mtr[i, j] == 0: numVarZ += 1 Z[i, j] = numVarY + numVarB + numVarZ numVarK = 0 if kmax > 0: K = np.empty(num_mutations + 1, dtype=np.int64) for j in range(num_mutations + 1): numVarK += 1 K[j] = numVarY + numVarB + numVarZ + numVarK if vaf_info is not None: A = np.empty((num_cells + 1, num_mutations + 1), dtype=np.int64) numVarA = 0 for i in range(num_cells + 1): for j in range(num_mutations + 1): numVarA += 1 A[i, j] = numVarY + numVarB + numVarZ + numVarK + numVarA for p in range(num_mutations): for q in range(p + 1, num_mutations): if kmax > 0: rc2.add_clause( [K[p], K[q], -B[p, q, 0, 1], -B[p, q, 1, 0], -B[p, q, 1, 1]] ) else: rc2.add_clause([-B[p, q, 0, 1], -B[p, q, 1, 0], -B[p, q, 1, 1]]) for i in range(num_cells): rc2.add_clause([-Y[i, p], -Y[i, q], B[p, q, 1, 1]]) rc2.add_clause([Y[i, p], -Y[i, q], B[p, q, 0, 1]]) rc2.add_clause([-Y[i, p], Y[i, q], B[p, q, 1, 0]]) for i in range(num_cells): for j in range(num_mutations): # 0->1 if alpha == 0: if I_mtr[i, j] == 0: rc2.add_clause([-Y[i, j]], weight=1) if I_mtr[i, j] == 1: rc2.add_clause([Y[i, j]]) # 0->1 and 1->0 if alpha > 0: if I_mtr[i, j] == 0: rc2.add_clause([Y[i, j], Z[i, j]]) rc2.add_clause([-Y[i, j], -Z[i, j]]) rc2.add_clause([Z[i, j]], weight=math.log((1 - alpha) / beta)) if I_mtr[i, j] == 1: rc2.add_clause([Y[i, j]], weight=math.log((1 - beta) / alpha)) if kmax > 0: for combo in combinations(range(num_mutations), kmax + 1): tmp = [] for com in combo: tmp.append(-K[com]) rc2.add_clause(tmp) for j in range(num_mutations): type(j) type(delta) s_time = time.time() variables = rc2.compute() e_time = time.time() running_time = e_time - s_time sol_Y = np.empty((num_cells, num_mutations), dtype=np.int8) numVar = 0 for i in range(num_cells): for j in range(num_mutations): sol_Y[i, j] = variables[numVar] > 0 numVar += 1 removedMutsIDs = [] if kmax > 0: sol_K = np.empty(num_mutations, dtype=np.int8) numVar = numVarY + numVarB + numVarZ for j in range(num_mutations): sol_K[j] = variables[numVar] > 0 numVar += 1 if sol_K[j]: removedMutsIDs.append(snvs[j]) df_output = pd.DataFrame(sol_Y) df_output.columns = snvs df_output.index = cells df_output.index.name = "cellIDxmutID" df_output[removedMutsIDs] = 0 scp.ul.stat(df_input, df_output, alpha, beta, running_time) return df_output
[docs]def phiscsi_bulk( df_input, alpha, beta, kmax=0, vaf_info=None, delta=0.2, time_limit=86400, n_threads=1, ): """Solving using PhISCS-I (in single-cell mode with bulk and mutation elimination). a combinatorial approach for subperfect tumor phylogeny reconstruction via integrative use of single-cell and bulk sequencing data :cite:`PhISCS`. Parameters ---------- df_input : :class:`pandas.DataFrame` Input genotype matrix in which rows are cells and columns are mutations. Values inside this matrix show the presence (1), absence (0) and missing entires (3). alpha : :obj:`float` False positive error rate. beta : :obj:`float` False negative error rate. kmax : :obj:`int`, optional Max number of mutations to be eliminated, by default 0 vaf_info : :class:`pandas.DataFrame`, optional Information about the variant allele frequency in bulk data The size is n_SNVs x n_samples, by default None delta : :obj:`float`, optional Delta parameter accounting for VAF variance, by default 0.2 time_limit : :obj:`int`, optional Time limit of the Gurobi running in seconds, by default 86400 (one day) n_threads : :obj:`int`, optional Number of threads for Gurobi solver, by default 1 Returns ------- :class:`pandas.DataFrame` A conflict-free matrix in which rows are cells and columns are mutations. Values inside this matrix show the presence (1) and absence (0). Examples -------- >>> adata = scp.datasets.acute_lymphocytic_leukemia2() >>> adata.var["VAF"] = ( 2 * adata.var["MutantCount"] / (adata.var["MutantCount"] + adata.var["ReferenceCount"]) ) >>> df_out = scp.tl.phiscsi_bulk( adata.to_df(), alpha=0.001, beta=0.181749, delta=0.2, kmax=3, vaf_info=adata.var[["VAF"]], ) """ gp, gp_is_not_imported = scp.ul.import_gurobi() if gp_is_not_imported: scp.logg.error("Unable to import a package!") scp.logg.info( f"running PhISCS-I-orig with alpha={alpha}, beta={beta}, kmax={kmax}, " f"vaf_info={vaf_info}, delta={delta}, time_limit={time_limit}, " f"n_threads={n_threads}" ) cells = list(df_input.index) snvs = list(df_input.columns) df_input = df_input.replace("?", 3) df_input = df_input.astype(int) I_mtr = df_input.values model = gp.Model("ILP") model.Params.OutputFlag = 0 model.Params.LogFile = "" model.Params.Threads = n_threads model.Params.TimeLimit = time_limit numCells = len(cells) numMutations = len(snvs) if vaf_info is not None: vaf_info = vaf_info.loc[snvs] sampleIDs = vaf_info.columns vaf_info.loc["NULL"] = vaf_info.shape[1] * [1] # Matrix Y is matrix of corrected (i.e. true) genotypes w.r.t. input SC matrix I Y = {} for c in range(numCells): for m in range(numMutations): Y[c, m] = model.addVar(vtype=gp.GRB.BINARY, name=f"Y({c},{m})") # Variables B control the existence of conflict between columns B = {} for p in range(numMutations + 1): for q in range(numMutations + 1): B[p, q, 1, 1] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"B[{p},{q},1,1]" ) B[p, q, 1, 0] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"B[{p},{q},1,0]" ) B[p, q, 0, 1] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"B[{p},{q},0,1]" ) K = {} for m in range(numMutations + 1): K[m] = model.addVar(vtype=gp.GRB.BINARY, name=f"K[{m}]") model.addConstr(K[numMutations] == 0) # null mutation can not be eliminated # A[p,q] = 1 if p is ancestor of q A = {} if vaf_info is not None: for p in range( numMutations + 1 ): # mutation with index numMutation is null mutation for q in range(numMutations + 1): A[p, q] = model.addVar(vtype=gp.GRB.BINARY, obj=0, name=f"A[{p},{q}]") model.update() # number of eliminated columns is upper bounded by user provided constant model.addConstr(gp.quicksum(K[m] for m in range(numMutations)) <= kmax) # Enforce three gametes rule for i in range(numCells): for p in range(numMutations): for q in range(numMutations): model.addConstr(Y[i, p] + Y[i, q] - B[p, q, 1, 1] <= 1) model.addConstr(-Y[i, p] + Y[i, q] - B[p, q, 0, 1] <= 0) model.addConstr(Y[i, p] - Y[i, q] - B[p, q, 1, 0] <= 0) # Null mutation present in each cell for p in range(numMutations + 1): model.addConstr(B[p, numMutations, 1, 0] == 0) # Forbid conflict between columns (three gametes rule) for p in range(numMutations): for q in range(numMutations): model.addConstr( B[p, q, 0, 1] + B[p, q, 1, 0] + B[p, q, 1, 1] <= 2 + K[p] + K[q] ) # Constraints for integrating VAF obtained from bulk data into the model if vaf_info is not None: for p in range(numMutations): for q in range(p + 1, numMutations): model.addConstr(A[p, q] + A[q, p] <= 1 - K[p]) model.addConstr(A[p, q] + A[q, p] <= 1 - K[q]) model.addConstr(A[p, q] + A[q, p] >= B[p, q, 1, 1] - K[p] - K[q]) for p in range(numMutations + 1): model.addConstr(A[p, p] == 0) for q in range(numMutations + 1): model.addConstr(A[p, q] <= 1 - K[p]) model.addConstr(A[p, q] <= 1 - K[q]) if p < q: model.addConstr(A[p, q] + A[q, p] <= 1) model.addConstr(A[p, q] + B[p, q, 0, 1] <= 1 + K[p] + K[q]) model.addConstr( B[p, q, 1, 0] + B[p, q, 1, 1] - A[p, q] <= 1 + K[p] + K[q] ) for sampleID in sampleIDs: VAF_p = float(vaf_info.iloc[p][sampleID]) VAF_q = float(vaf_info.iloc[q][sampleID]) model.addConstr(A[p, q] * VAF_p * (1 + delta) >= A[p, q] * VAF_q) for r in range(numMutations + 1): if r == q: continue VAF_r = float(vaf_info.iloc[r][sampleID]) # Constraint 2 model.addConstr( VAF_p * (1 + delta) >= VAF_q * (A[p, q] - A[r, q] - A[q, r]) + VAF_r * (A[p, r] - A[r, q] - A[q, r]) ) for r in range(numMutations + 1): if r == q: continue # Constraint 1.d model.addConstr(A[p, r] >= A[p, q] + A[q, r] - 1) candidateAncestors = list(range(numMutations + 1)) candidateAncestors.remove(p) if p < numMutations: model.addConstr( gp.quicksum(A[s, p] for s in candidateAncestors) >= 1 - K[p] ) elif p == numMutations: model.addConstr(gp.quicksum(A[s, p] for s in candidateAncestors) == 0) else: scp.logg.error("p index is out of range") # Defining the objective function objective = 0 for j in range(numMutations): numZeros = 0 numOnes = 0 for i in range(numCells): if I_mtr[i][j] == 0: numZeros += 1 objective += np.log(beta / (1 - alpha)) * Y[i, j] elif I_mtr[i][j] == 1: numOnes += 1 objective += np.log((1 - beta) / alpha) * Y[i, j] objective += numZeros * np.log(1 - alpha) objective += numOnes * np.log(alpha) objective -= K[j] * ( numZeros * np.log(1 - alpha) + numOnes * (np.log(alpha) + np.log((1 - beta) / alpha)) ) model.setObjective(objective, gp.GRB.MAXIMIZE) s_time = time.time() model.optimize() e_time = time.time() running_time = e_time - s_time removedMutsIDs = [] sol_K = [] for j in range(numMutations): sol_K.append(K[j].X > 0.5) if sol_K[j] == 1: removedMutsIDs.append(snvs[j]) sol_Y = np.zeros((numCells, numMutations), dtype=np.int8) for i in range(numCells): for j in range(numMutations): sol_Y[i, j] = Y[i, j].X > 0.5 df_output = pd.DataFrame(sol_Y) df_output.columns = snvs df_output.index = cells df_output.index.name = "cellIDxmutID" df_output[removedMutsIDs] = 0 # df_output.drop(removedMutsIDs, axis=1, inplace=True) scp.ul.stat(df_input, df_output, alpha, beta, running_time) return df_output
def phiscs_readcount(adata, alpha, beta, time_limit=86400, n_threads=1): """Solving using PhISCS-ReadCount (only in single-cell mode, no bulk). a combinatorial approach for subperfect tumor phylogeny reconstruction via integrative use of single-cell and bulk sequencing data :cite:`PhISCS`. Parameters ---------- adata : :class:`anndata.AnnData` Input data contains layers of mutant and total. alpha : :obj:`float` False positive error rate. beta : :obj:`float` False negative error rate. time_limit : :obj:`int`, optional Time limit of the Gurobi running in seconds, by default 86400 (one day) n_threads : :obj:`int`, optional Number of threads for Gurobi solver, by default 1 Returns ------- :class:`pandas.DataFrame` A conflict-free matrix in which rows are cells and columns are mutations. Values inside this matrix show the presence (1) and absence (0). """ gp, gp_is_not_imported = scp.ul.import_gurobi() if gp_is_not_imported: scp.logg.error("Unable to import a package!") scp.logg.info( f"running PhISCS-readcout with alpha={alpha}, beta={beta}," f" time_limit={time_limit}" ) PROB_SEQ_ERROR = 0.001 BETABINOM_ALPHA = 1.0 BETABINOM_BETA = 1.0 def _prob_absent(v, t): prob = binom.pmf(v, t, PROB_SEQ_ERROR) return prob def _prob_present(v, t): prob = pmf_BetaBinomial(v, t, BETABINOM_ALPHA, BETABINOM_BETA) return prob df_input = adata.to_df() cells = adata.obs_names muts = adata.var_names T = adata.layers["total"] V = adata.layers["mutant"] B_mu = np.zeros((len(cells), len(muts)), dtype=np.float64) B_delta = np.zeros((len(cells), len(muts)), dtype=np.float64) for i in range(len(cells)): for j in range(len(muts)): if T[i, j] != 0: B_mu[i, j] = _prob_absent(V[i, j], T[i, j]) B_delta[i, j] = _prob_present(V[i, j], T[i, j]) model = gp.Model("ILP") model.Params.OutputFlag = 0 model.Params.LogFile = "" model.Params.Threads = n_threads model.Params.TimeLimit = time_limit Y = {} B = {} for c in range(len(cells)): for m in range(len(muts)): Y[c, m] = model.addVar(vtype=gp.GRB.BINARY, name=f"Y({c},{m})") for p in range(len(muts)): for q in range(len(muts)): B[p, q, 1, 1] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"B[{p},{q},1,1]" ) B[p, q, 1, 0] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"B[{p},{q},1,0]" ) B[p, q, 0, 1] = model.addVar( vtype=gp.GRB.BINARY, obj=0, name=f"B[{p},{q},0,1]" ) for p in range(len(muts)): for q in range(len(muts)): model.addConstr(B[p, q, 0, 1] + B[p, q, 1, 0] + B[p, q, 1, 1] <= 2) for i in range(len(cells)): model.addConstr(Y[i, p] + Y[i, q] - B[p, q, 1, 1] <= 1) model.addConstr(-Y[i, p] + Y[i, q] - B[p, q, 0, 1] <= 0) model.addConstr(Y[i, p] - Y[i, q] - B[p, q, 1, 0] <= 0) objective = 0 for j in range(len(muts)): for i in range(len(cells)): if T[i, j] != 0: objective += (1 - Y[i, j]) * np.log( B_mu[i, j] * (1 - alpha) + B_delta[i, j] * (alpha) ) objective += (Y[i, j]) * np.log( B_mu[i, j] * (beta) + B_delta[i, j] * (1 - beta) ) model.setObjective(objective, gp.GRB.MAXIMIZE) s_time = time.time() model.optimize() e_time = time.time() running_time = e_time - s_time sol_Y = np.zeros((len(cells), len(muts)), dtype=np.int8) for i in range(len(cells)): for j in range(len(muts)): sol_Y[i, j] = Y[i, j].X > 0.5 df_output = pd.DataFrame(sol_Y) df_output.columns = muts df_output.index = cells df_output.index.name = "cellIDxmutID" scp.ul.stat(df_input, df_output, alpha, beta, running_time) return df_output