Source code for pytransit.stat_tools

import math
import numpy
import scipy.stats



[docs]def sample_trunc_norm_post(data, S, mu0, s20, k0, nu0): n = len(data) s2 = numpy.var(data,ddof=1) ybar = numpy.mean(data) kn = k0+n nun = nu0+n mun = (k0*mu0 + n*ybar)/float(kn) s2n = (1.0/nun) * (nu0*s20 + (n-1)*s2 + (k0*n/float(kn))*numpy.power(ybar-mu0,2)) s2_post = 1.0/scipy.stats.gamma.rvs(nun/2.0, scale=2.0/(s2n*nun), size=S) # Truncated Normal since counts can't be negative min_mu = 0 max_mu = 1000000 trunc_a = (min_mu-mun)/numpy.sqrt(s2_post/float(kn)) trunc_b = (max_mu-mun)/numpy.sqrt(s2_post/float(kn)) mu_post = scipy.stats.truncnorm.rvs(a=trunc_a, b=trunc_b, loc=mun, scale=numpy.sqrt(s2_post/float(kn)), size=S) return (mu_post, s2_post)
#
[docs]def FWER_Bayes(X): ii = numpy.argsort(numpy.argsort(X)) P_NULL = numpy.sort(X) W = 1 - P_NULL N = len(P_NULL) P_ALT = numpy.zeros(N) for i in range(N): P_ALT[i] = 1.0 - numpy.prod(W[:i+1]) return P_ALT[ii]
#
[docs]def bFDR(X): N = len(X) ii = numpy.argsort(numpy.argsort(X)) P_NULL = numpy.sort(X) P_ALT = numpy.zeros(N) for i in range(N): P_ALT[i] = numpy.mean(P_NULL[:i+1]) return P_ALT[ii]
#
[docs]def HDI_from_MCMC(posterior_samples, credible_mass=0.95): # Credit to 'user72564' # https://stackoverflow.com/questions/22284502/highest-posterior-density-region-and-central-credible-region # Computes highest density interval from a sample of representative values, # estimated as the shortest credible interval # Takes Arguments posterior_samples (samples from posterior) and credible mass (normally .95) sorted_points = sorted(posterior_samples) ciIdxInc = numpy.ceil(credible_mass * len(sorted_points)).astype('int') nCIs = len(sorted_points) - ciIdxInc ciWidth = [0]*nCIs for i in range(0, nCIs): ciWidth[i] = sorted_points[i + ciIdxInc] - sorted_points[i] HDImin = sorted_points[ciWidth.index(min(ciWidth))] HDImax = sorted_points[ciWidth.index(min(ciWidth))+ciIdxInc] return(HDImin, HDImax)
#
[docs]def transformToRange(X, new_min, new_max, old_min=None, old_max=None): if old_min == None: old_min = min(X) if old_max == None: old_max = max(X) old_range = old_max - old_min new_range = new_max - new_min return [float(x - old_min) / old_range * new_range + new_min for x in X]
#
[docs]def fact(n): if n == 0: return (1) else: return reduce(lambda x,y: x*y, range(1,n+1))
#
[docs]def comb1(n,k): prod = 1 for i in range(1,k+1): prod = prod * (n - (k-i))/float(i) return(prod)
#
[docs]def comb(n, k): if k < 0 or k > n: return 0 if k > n - k: # take advantage of symmetry k = n - k c = 1 for i in range(k): c = c * (n - (k - (i+1))) c = c // (i+1) return c
#
[docs]def norm(x, mu,sigma): """Normal distribution""" sigma = float(sigma) return(1/(sigma*(math.sqrt(2*math.pi))) * math.exp( -0.5 * math.pow( (x-mu)/sigma,2)))
#
[docs]def binom(k,n,p): """Binomial distribution. Uses Normal approximation for large 'n' """ if n >= 100: return(norm(k, n*p, math.sqrt(n*p*(1-p)) ) ) else: return(comb(n,k) * math.pow(p, k) * math.pow(1-p, n-k))
#
[docs]def binom_cdf(k,n,p): """CDF of the binomial distribution""" return(sum([binom(i,n,p) for i in range(0,k+1)]))
#
[docs]def binom_test(k,n,p, type="two-sided"): """Does a binomial test given success, trials and probability.""" if type == "less": return(binom_cdf(k,n,p)) elif type == "greater": return(1-binom_cdf(k-1,n,p)) else: if p == 0: return(1) #return(k == 0) elif p == 1: return(1) #return(k == n) else: relErr = 1 + 1e-7 d = binom(k,n,p) m = n * p if k == m: return(1) elif (k < m): ri = range(int(math.ceil(m)), n+1) y = sum([1 for j in ri if binom(j,n,p) <= d*relErr]) return(binom_cdf(k,n,p) + (1-binom_cdf(int(n-y),n,p))) else: ri = range(0, int(math.floor(m))) y = sum([1 for j in ri if binom(j,n,p) <= d*relErr]) return(binom_cdf(y-1,n,p) + (1-binom_cdf(k-1,n,p)))
############################## # Bernoulli Diff Distribution
[docs]def dberndiff(d, peq, p01, p10): N = numpy.size(d) if N == 0: return 0.0 if N == 1: if type(d) == type(()): d = d[0] if d == 0: return peq else: if d == -1: return p01 if d == 1: return p10 return 0.0 # else: d = numpy.array(d) result = numpy.zeros(N) result[d == -1] = p01 result[d == 0] = peq result[d == 1] = p10 return result
#
[docs]def qberndiff(d, peq, p01, p10): return numpy.sum([ dberndiff(x, peq, p01, p10) for x in range(-1, d + 1) ])
############################# # Binomial Diff Distribution
[docs]def dbinomdiff(d, n, P): S = numpy.array(my_perm(d, n)) return numpy.sum(multinomial(S, P))
#
[docs]def qbinomdiff(d, n, peq, p01, p10): return numpy.sum([ dbinomdiff(x, n, peq, p01, p10) for x in range(-n, d + 1) ])
#
[docs]def my_perm(d, n): S = [] if d == 0: for i in range(n + 1): r = n - i if isEven(r): S.append((int(r / 2.0), i, int(r / 2.0))) # if d > 0: for i in range(d, n + 1): r = n - i if i == d: S.append((0, n - d, d)) elif i > d: r = n - (i + (i - d)) if 0 <= r <= n: S.append((i - d, r, i)) # if d < 0: for i in range(abs(d), n + 1): r = n - i if i == abs(d): S.append((-d, n + d, 0)) elif i > d: r = n - (i + (i + d)) if 0 <= r <= n: S.append((i, r, i + d)) # return S
#
[docs]def multinomial(K, P): N = numpy.sum(K, 1) if K.shape == P.shape: return tricoeff(N, K) * numpy.prod([ numpy.power(P[i], K[i]) for i in range(len(K)) ], 1) else: return tricoeff(N, K) * numpy.prod([ numpy.power(P, K[i]) for i in range(len(K)) ], 1)
#
[docs]def log_fac(n): return numpy.sum(numpy.log(numpy.arange(2, n + 1)))
#
[docs]def tricoeff(N, S): try: LOG_FAC except NameError: LOG_FAC = [] for i in range(numpy.max(N) + 1): LOG_FAC.append(log_fac(i)) # LOG_FAC = numpy.array(LOG_FAC) # return numpy.exp(LOG_FAC[N] - (LOG_FAC[S[:, 0]] + LOG_FAC[S[:, 1]] + LOG_FAC[S[:, 2]]))
#
[docs]def isEven(x): return x % 2 == 0
#
[docs]def regress(X,Y): """Performs linear regression given two vectors, X, Y.""" N = len(X) xbar = numpy.average(X) ybar = numpy.average(Y) xybar = numpy.average([X[i]*Y[i] for i in range(N)]) x2bar = numpy.average([X[i]*X[i] for i in range(N)]) B = (xybar - xbar*ybar)/(x2bar - xbar*xbar) A0 = ybar - B*xbar yfit = [ A0 + B *X[i] for i in range(N)] yres = [Y[i] - (A0 + B *X[i]) for i in range(N)] var = sum([math.pow(yres[i],2) for i in range(N) ])/(N-2) std = math.sqrt(var) return(B, A0, std)
#
[docs]def boxcoxtransform(x, lambdax): """ Performs a box-cox transformation to data vector X. WARNING: elements of X should be all positive! Fixed: '>' has changed to '<' """ if x <= 0: raise ValueError("Nonpositive value(s) in X vector") if abs(lambdax) < 1.0e-5: return(math.log(x)) else: return((x**lambdax - 1.0)/lambdax)
#return math.log(x) if abs(lambdax) < 1.0e-5 else (x**lambdax - 1.0)/lambdax #
[docs]def loglik(X, lambdax): """ Computes the log-likelihood function for a transformed vector Xtransform. """ n = len(X) Xtrans = [boxcoxtransform(x, lambdax) for x in X] meanX = sum(Xtrans) / float(n) S2 = (lambdax - 1.0) * sum([math.log(x) for x in X]) S = sum([(x-meanX) **2 for x in Xtrans]) S1= (-n/2.0)*math.log(S/n) return S2+S1
#
[docs]def boxcoxTable(X, minlambda, maxlambda, dellambda): """ Returns a table of (loglik function, lambda) pairs for the data. """ # Create a table (lambda, loglik) out = [] vallambda = minlambda while vallambda <= maxlambda+1.0e-5: llik = loglik(X, vallambda) out.append((llik, vallambda)) vallambda += dellambda return out
#
[docs]def phi_coefficient(X,Y): """Calculates the phi-coefficient for two bool arrays""" N = len(X) assert len(X) == len(Y), "Length of arrays must be equal" x1y1 = sum([int(X[j]) == int(Y[j]) == 1 for j in range(N)]) x1y0 = sum([int(X[j]) == 1 and int(Y[j]) == 0 for j in range(N)]) x0y1 = sum([int(X[j]) == 0 and int(Y[j]) == 1 for j in range(N)]) x0y0 = sum([int(X[j]) == int(Y[j]) == 0 for j in range(N)]) x1 = x1y1 + x1y0 x0 = x0y1 + x0y0 y1 = x1y1 + x0y1 y0 = x1y0 + x0y0 phi_coeff = (x1y1*x0y0 - x1y0*x0y1)/math.sqrt(x1*x0*y1*y0) return phi_coeff
#
[docs]def BH_fdr_correction(X): """Adjusts p-values using the Benjamini Hochberg procedure""" n = len(X) qvalues = numpy.zeros(n) pvalues = numpy.array(X) pvalues.sort() pvalues = pvalues[::-1] for i in range(n): rank = n - i qvalues[i] = n/float(rank) * pvalues[i] for i in range(0, n-1): if qvalues[i] < qvalues[i+1]: qvalues[i+1] = qvalues[i] p2qval = dict([(p,q) for (p,q) in zip(pvalues,qvalues)]) return numpy.array([p2qval[p] for p in X])
#
[docs]def bayesian_ess_thresholds(Z_raw, ALPHA=0.05): """Returns Essentiality Thresholds using a BH-like procedure""" Z = numpy.sort(Z_raw)[::-1] W = 1 - Z N = len(Z) ess_threshold = 1.00 INDEX = list(range(3, N+1)) count = 0 for i in INDEX: count +=1 wi = 1 - Z[i-1] ai_n = (ALPHA*i)/N mean_wi = numpy.average(W[0:i-2]) delta_w = wi - mean_wi #if count < 30: print(i, wi, ai_n, delta_w) if delta_w > ai_n: ess_threshold = Z[i-1] #print("i", i) break noness_threshold = 0.00 count = 0 INDEX = list(range(0, N+1)) INDEX.sort(reverse=True) for i in INDEX: wi = Z[N-i+1] ai_n = (ALPHA*i)/N mean_wi = numpy.average(Z[N-i+1:]) delta_w = Z[N-i+1] - mean_wi count +=1 #print(count) #if count < 20: # print(i, wi, ai_n, mean_wi, delta_w, N-i+1, N-1, W[N-i-1], W[i-1]) if ai_n > delta_w: # print(i, wi, ai_n, mean_wi, delta_w, N-i+1, N-1, W[N-i-1], W[i-1]) break noness_threshold = Z[N-i] return(ess_threshold, noness_threshold)
#
[docs]def tricube(X): #TODO: Write docstring result = numpy.zeros(len(X)) ii = numpy.logical_and(X >= -1, X <= 1) result[ii] = numpy.power(1 - numpy.power(numpy.abs(X[ii]), 3), 3) return result
#
[docs]def loess(X, Y, h=10000): #TODO: Write docstring smoothed = numpy.zeros(len(Y)) for i,x in enumerate(X): W = tricube((X-x)/float(h)) sW = numpy.sum(W) wsX = numpy.sum(W*X) wsY = numpy.sum(W*Y) wsXY = numpy.sum(W*X*Y) sXX = numpy.sum(X*X) B = (sW * wsXY - wsX * wsY)/(sW * sXX - numpy.power(wsX,2)) A = (wsY - B*wsX) / sW smoothed[i] = B*x + A return smoothed
#
[docs]def loess_correction(X, Y, h=10000, window=100): #TODO: Write docstring Y = numpy.array(Y) size = int(len(X)/window) + 1 x_w = numpy.zeros(size) y_w = numpy.zeros(size) for i in range(size): x_w[i] = window*i y_w[i] = sum(Y[window*i:window*(i+1)]) ysmooth = loess(x_w, y_w, h) mline = numpy.mean(y_w) y_w * (ysmooth/mline) normalized_Y = numpy.zeros(len(Y)) for i in range(size): normalized_Y[window*i:window*(i+1)] = Y[window*i:window*(i+1)] * (ysmooth[i]/mline) return normalized_Y
#
[docs]def F_mean_diff_flat(*args, **kwargs): A = args[0] B = args[1] return numpy.mean(B) - numpy.mean(A)
#
[docs]def F_sum_diff_flat(*args, **kwargs): A = args[0] B = args[1] return numpy.sum(B) - numpy.sum(A)
#
[docs]def F_mean_diff_dict(*args, **kwargs): D = args[0] data1_total = 0; data2_total = 0; data1_size = 0; data2_size = 0; for L in D: data1_total+= numpy.sum(D[L][0]) data1_size+= len(D[L][0]) data2_total+= numpy.sum(D[L][1]) data2_size+= len(D[L][1]) return (data2_total/float(data2_size)) - (data1_total/float(data1_size))
#
[docs]def F_sum_diff_dict(*args, **kwargs): D = args[0] data1_total = 0; data2_total = 0; for L in D: data1_total+= numpy.sum(D[L][0]) data2_total+= numpy.sum(D[L][1]) return data2_total - data1_total
#
[docs]def F_shuffle_flat(*args, **kwargs): X = args[0] return numpy.random.permutation(X)
#
[docs]def F_shuffle_dict_libraries(*args, **kwargs): D = args[0] E = {} for L in D: #print("L", L) n1 = len(D[L][0]) combined = numpy.append(D[L][0], D[L][1]) #print("combined", combined) perm = numpy.random.permutation(combined) #print("perm", perm) #print("perm[:n1]", perm[:n1]) E[L] = numpy.array([perm[:n1], perm[n1:]]) #print("D[L]", D[L]) return E
#
[docs]def resampling(data1, data2, S=10000, testFunc=F_mean_diff_flat, permFunc=F_shuffle_flat, adaptive=False, lib_str1="", lib_str2=""): """Does a permutation test on two sets of data. Performs the resampling / permutation test given two sets of data using a function defining the test statistic and a function defining how to permute the data. Args: ar: List or numpy array with the first set of observations. data2: List or numpy array with the second set of observations. S: Number of permutation tests (or samples) to obtain. testFunc: Function defining the desired test statistic. Should accept two lists as arguments. Default is difference in means between the observations. permFunc: Function defining the way to permute the data. Should accept one argument, the combined set of data. Default is random shuffle. adaptive: Cuts-off resampling early depending on significance. Returns: Tuple with described values - test_obs -- Test statistic of observation. - mean1 -- Arithmetic mean of first set of data. - mean2 -- Arithmetic mean of second set of data. - log2FC -- Normalized log2FC the means. - pval_ltail -- Lower tail p-value. - pval_utail -- Upper tail p-value. - pval_2tail -- Two-tailed p-value. - test_sample -- List of samples of the test statistic. :Example: >>> import pytransit.stat_tools as stat_tools >>> import numpy >>> X = numpy.random.random(100) >>> Y = numpy.random.random(100) >>> (test_obs, mean1, mean2, log2fc, pval_ltail, pval_utail, pval_2tail, test_sample) = stat_tools.resampling(X,Y) >>> pval_2tail 0.2167 >>> test_sample[:3] [0.076213992904990535, -0.0052513291091412784, -0.0038425140184765172] """ # Do basic sanity checks: # - Check library strings match in some way lib_diff = set(lib_str1) ^ set(lib_str2) if lib_diff: raise ValueError("At least one library string has a letter not used by the other:\ %s" % ", ".join(lib_diff)) # - Check input has some data assert len(data1) > 0, "Data1 cannot be empty" assert len(data2) > 0, "Data2 cannot be empty" count_ltail = 0 count_utail = 0 count_2tail = 0 test_list = [] # Calculate basic statistics for the input data: n1 = len(data1) n2 = len(data2) mean1 = 0 if n1 > 0: mean1 = numpy.mean(data1) mean2 = 0 if n2 > 0: mean2 = numpy.mean(data2) try: # Only adjust log2FC if one of the means is zero if mean1 > 0 and mean2 > 0: log2FC = math.log((mean2)/(mean1),2) else: log2FC = math.log((mean2+1.0)/(mean1+1.0),2) except: log2FC = 0.0 # Get stats and info based on whether working with libraries or not: nTAs = 0 if lib_str1: # Get number of TA sites implied nTAs = len(data1.flatten())//len(lib_str1) assert len(data2.flatten())//len(lib_str2) == nTAs, "Datasets do not have matching sites;\ check input data and library strings." # Get data perm = get_lib_data_dict(data1, lib_str1, data2, lib_str2, nTAs) test_obs = testFunc(perm) else: try: test_obs = testFunc(data1, data2) except Exception as e: print("") print("!"*100) print("Error: Could not apply test function to input data!") print("data1", data1) print("data2", data2) print("") print("\t%s" % e) print("!"*100) print("") return None perm = numpy.zeros(n1+n2) perm[:n1] = data1 perm[n1:] = data2 count_ltail = 0 count_utail = 0 count_2tail = 0 test_list = [] s_performed = 0 for s in range(S): if len(perm) >0: perm = permFunc(perm) if not lib_str1: test_sample = testFunc(perm[:n1], perm[n1:]) else: test_sample = testFunc(perm) else: test_sample = 0 test_list.append(test_sample) if test_sample <= test_obs: count_ltail+=1 if test_sample >= test_obs: count_utail+=1 if abs(test_sample) >= abs(test_obs): count_2tail+=1 s_performed+=1 if adaptive: if s_performed == round(S*0.01) or s_performed == round(S*0.1) or s_performed == round(S*1): if count_2tail >= round(S*0.01*0.10): break pval_ltail = count_ltail/float(s_performed) pval_utail = count_utail/float(s_performed) pval_2tail = count_2tail/float(s_performed) return (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, test_list)
#
[docs]def cumulative_average(new_x, n, prev_avg): return ((new_x + (n*prev_avg))/(n+1.0), n+1)
#
[docs]def text_histogram(X, nBins = 20, resolution=200, obs = None): MIN = numpy.min(X) MAX = numpy.max(X) bin_list = numpy.linspace(MIN, MAX, nBins) hit_flag = "->"; empty_flag = " " for b_l, b_u in zip(bin_list[:-2], bin_list[1:]): Z = numpy.logical_and(b_l <= X, X < b_u) density = numpy.mean(Z) if obs != None and (b_l <= obs < b_u): flag = hit_flag else: flag = empty_flag print("%-12f\t%s|%s" % (b_l, flag, "#"*int(resolution*density))) Z = numpy.logical_and(bin_list[-1] <= X, X < float("inf")) density = numpy.mean(Z) if obs != None and (bin_list[-1] <= obs < float("inf")): flag = hit_flag else: flag = empty_flag print("%-12f\t%s|%s" % (bin_list[-1], flag, "#"*int(resolution*density)))
[docs]def parse_lib_index(nData, libstr, nTAs): full_index = numpy.arange(nData) lib_to_index = {} for k,L in enumerate(libstr): if L not in lib_to_index: lib_to_index[L] = [] lib_to_index[L] += list(full_index[k*nTAs:((k+1)*nTAs)]) for L,index in lib_to_index.items(): lib_to_index[L] = numpy.array(index) return lib_to_index
#
[docs]def combine_lib_dicts(L1, L2): KEYS = L1.keys() DATA = {} for K in KEYS: DATA[K] = numpy.array([L1[K], L2[K]]) return DATA
#
[docs]def get_lib_data_dict(data1, ctrl_lib_str, data2, exp_lib_str, nTAs): lib1_index_dict = parse_lib_index(len(data1), ctrl_lib_str, nTAs) lib2_index_dict = parse_lib_index(len(data2), exp_lib_str, nTAs) lib1_data_dict = dict([(L, data1[lib1_index_dict[L]]) for L in sorted(lib1_index_dict)]) lib2_data_dict = dict([(L, data2[lib2_index_dict[L]]) for L in sorted(lib2_index_dict)]) data_dict = combine_lib_dicts(lib1_data_dict, lib2_data_dict) return data_dict
#TEST-CASES if __name__ == "__main__": """ n = 20 p = 0.5 k = 14 print("") print("#########################################") print("############ BINOM TEST #################") print("#########################################") print("Coin Tosses: %d" % n) print("Success Prob: %3.2f" % p) print("Observed: %d" % k) print("") print("Left-Tail Test:") print("%d tosses, p-value = %f" % (k, binom_test(k,n,p,"less"))) print("") print("Right-Tail Test:") print("%d tosses, p-value = %f" % (k, binom_test(k,n,p,"greater"))) print("") print("Two-Sided Test:") print("%d tosses, p-value = %f" % (k, binom_test(k,n,p,"two-sided"))) print("") print("") print("#########################################") print("############ RESAMPLING #################") print("#########################################") data1 = scipy.stats.norm.rvs(100,10, size=1000) data2 = scipy.stats.norm.rvs(105,10, size=1000) (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, test_list) = resampling(data1, data2, S=10000) print("Data1:") text_histogram(data1, nBins = 20) print("") print("Data2:") text_histogram(data2, nBins = 20) print("") print("Results:", (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail)) print("") print("Resampling Histogram:" ) text_histogram(test_list, nBins = 20, obs=test_obs) """ ## TEST import pytransit.transit_tools as transit_tools import pytransit.tnseq_tools as tnseq_tools import pytransit.norm_tools as norm_tools import sys ctrldata = ["/pacific/home/mdejesus/transit/tests/GI/H37Rv_day0_rep1.wig", "/pacific/home/mdejesus/transit/tests/GI/Rv2680_day0_rep1.wig", "/pacific/home/mdejesus/transit/tests/GI/H37Rv_day0_rep2.wig", "/pacific/home/mdejesus/transit/tests/GI/Rv2680_day0_rep2.wig"] expdata = ["/pacific/home/mdejesus/transit/tests/GI/H37Rv_day32_rep1.wig", "/pacific/home/mdejesus/transit/tests/GI/H37Rv_day32_rep2.wig", "/pacific/home/mdejesus/transit/tests/GI/H37Rv_day32_rep3.wig", "/pacific/home/mdejesus/transit/tests/GI/Rv2680_day32_rep1.wig", "/pacific/home/mdejesus/transit/tests/GI/Rv2680_day32_rep2.wig", "/pacific/home/mdejesus/transit/tests/GI/Rv2680_day32_rep3.wig"] annotation = "/pacific/home/mdejesus/transit/tests/GI/H37Rv.prot_table" i = 202 if len(sys.argv) > 1: i = int(sys.argv[1]) DO_LIB = True if len(sys.argv) > 2: DO_LIB = bool(int(sys.argv[2])) if DO_LIB: ctrl_lib_str = "ABAB" exp_lib_str = "AAABBB" else: ctrl_lib_str = "" exp_lib_str = "" Kctrl = len(ctrldata) Kexp = len(expdata) (data, position) = transit_tools.get_validated_data(ctrldata+expdata) (K,N) = data.shape (data, factors) = norm_tools.normalize_data(data, "TTR", ctrldata+expdata, annotation) G = tnseq_tools.Genes(ctrldata + expdata, annotation, data=data, position=position) gene = G[i] print("\n\n") print("#"*100) print("# (%s) NEW TEST: %s" % (DO_LIB, gene)) print("#"*100) print("") ii = numpy.ones(gene.n) == 1 data1 = gene.reads[:Kctrl,ii].flatten() data2 = gene.reads[Kctrl:,ii].flatten() data_dict = get_lib_data_dict(data1, ctrl_lib_str, data2, exp_lib_str, gene.n) if DO_LIB: (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, testlist) = resampling(data1, data2, S=10000, testFunc=F_mean_diff_dict, permFunc=F_shuffle_dict_libraries, adaptive=False, lib_str1=ctrl_lib_str, lib_str2=exp_lib_str) else: (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, testlist) = resampling(data1, data2, S=10000, testFunc=F_mean_diff_flat, permFunc=F_shuffle_flat, adaptive=False, lib_str1=ctrl_lib_str, lib_str2=exp_lib_str) print("Resampling Histogram:") text_histogram(testlist, nBins = 20, obs=test_obs)