import math
import numpy
import scipy.stats
[docs]def sample_trunc_norm_post(data, S, mu0, s20, k0, nu0):
n = len(data)
s2 = numpy.var(data,ddof=1)
ybar = numpy.mean(data)
kn = k0+n
nun = nu0+n
mun = (k0*mu0 + n*ybar)/float(kn)
s2n = (1.0/nun) * (nu0*s20 + (n-1)*s2 + (k0*n/float(kn))*numpy.power(ybar-mu0,2))
s2_post = 1.0/scipy.stats.gamma.rvs(nun/2.0, scale=2.0/(s2n*nun), size=S)
# Truncated Normal since counts can't be negative
min_mu = 0
max_mu = 1000000
trunc_a = (min_mu-mun)/numpy.sqrt(s2_post/float(kn))
trunc_b = (max_mu-mun)/numpy.sqrt(s2_post/float(kn))
mu_post = scipy.stats.truncnorm.rvs(a=trunc_a, b=trunc_b, loc=mun, scale=numpy.sqrt(s2_post/float(kn)), size=S)
return (mu_post, s2_post)
#
[docs]def FWER_Bayes(X):
ii = numpy.argsort(numpy.argsort(X))
P_NULL = numpy.sort(X)
W = 1 - P_NULL
N = len(P_NULL)
P_ALT = numpy.zeros(N)
for i in range(N):
P_ALT[i] = 1.0 - numpy.prod(W[:i+1])
return P_ALT[ii]
#
[docs]def bFDR(X):
N = len(X)
ii = numpy.argsort(numpy.argsort(X))
P_NULL = numpy.sort(X)
P_ALT = numpy.zeros(N)
for i in range(N):
P_ALT[i] = numpy.mean(P_NULL[:i+1])
return P_ALT[ii]
#
[docs]def HDI_from_MCMC(posterior_samples, credible_mass=0.95):
# Credit to 'user72564'
# https://stackoverflow.com/questions/22284502/highest-posterior-density-region-and-central-credible-region
# Computes highest density interval from a sample of representative values,
# estimated as the shortest credible interval
# Takes Arguments posterior_samples (samples from posterior) and credible mass (normally .95)
sorted_points = sorted(posterior_samples)
ciIdxInc = numpy.ceil(credible_mass * len(sorted_points)).astype('int')
nCIs = len(sorted_points) - ciIdxInc
ciWidth = [0]*nCIs
for i in range(0, nCIs):
ciWidth[i] = sorted_points[i + ciIdxInc] - sorted_points[i]
HDImin = sorted_points[ciWidth.index(min(ciWidth))]
HDImax = sorted_points[ciWidth.index(min(ciWidth))+ciIdxInc]
return(HDImin, HDImax)
#
#
[docs]def fact(n):
if n == 0: return (1)
else: return reduce(lambda x,y: x*y, range(1,n+1))
#
[docs]def comb1(n,k):
prod = 1
for i in range(1,k+1):
prod = prod * (n - (k-i))/float(i)
return(prod)
#
[docs]def comb(n, k):
if k < 0 or k > n:
return 0
if k > n - k: # take advantage of symmetry
k = n - k
c = 1
for i in range(k):
c = c * (n - (k - (i+1)))
c = c // (i+1)
return c
#
[docs]def norm(x, mu,sigma):
"""Normal distribution"""
sigma = float(sigma)
return(1/(sigma*(math.sqrt(2*math.pi))) * math.exp( -0.5 * math.pow( (x-mu)/sigma,2)))
#
[docs]def binom(k,n,p):
"""Binomial distribution. Uses Normal approximation for large 'n' """
if n >= 100:
return(norm(k, n*p, math.sqrt(n*p*(1-p)) ) )
else:
return(comb(n,k) * math.pow(p, k) * math.pow(1-p, n-k))
#
[docs]def binom_cdf(k,n,p):
"""CDF of the binomial distribution"""
return(sum([binom(i,n,p) for i in range(0,k+1)]))
#
[docs]def binom_test(k,n,p, type="two-sided"):
"""Does a binomial test given success, trials and probability."""
if type == "less": return(binom_cdf(k,n,p))
elif type == "greater": return(1-binom_cdf(k-1,n,p))
else:
if p == 0: return(1) #return(k == 0)
elif p == 1: return(1) #return(k == n)
else:
relErr = 1 + 1e-7
d = binom(k,n,p)
m = n * p
if k == m: return(1)
elif (k < m):
ri = range(int(math.ceil(m)), n+1)
y = sum([1 for j in ri if binom(j,n,p) <= d*relErr])
return(binom_cdf(k,n,p) + (1-binom_cdf(int(n-y),n,p)))
else:
ri = range(0, int(math.floor(m)))
y = sum([1 for j in ri if binom(j,n,p) <= d*relErr])
return(binom_cdf(y-1,n,p) + (1-binom_cdf(k-1,n,p)))
##############################
# Bernoulli Diff Distribution
[docs]def dberndiff(d, peq, p01, p10):
N = numpy.size(d)
if N == 0:
return 0.0
if N == 1:
if type(d) == type(()):
d = d[0]
if d == 0:
return peq
else:
if d == -1:
return p01
if d == 1:
return p10
return 0.0
#
else:
d = numpy.array(d)
result = numpy.zeros(N)
result[d == -1] = p01
result[d == 0] = peq
result[d == 1] = p10
return result
#
[docs]def qberndiff(d, peq, p01, p10):
return numpy.sum([ dberndiff(x, peq, p01, p10) for x in range(-1, d + 1) ])
#############################
# Binomial Diff Distribution
[docs]def dbinomdiff(d, n, P):
S = numpy.array(my_perm(d, n))
return numpy.sum(multinomial(S, P))
#
[docs]def qbinomdiff(d, n, peq, p01, p10):
return numpy.sum([ dbinomdiff(x, n, peq, p01, p10) for x in range(-n, d + 1) ])
#
[docs]def my_perm(d, n):
S = []
if d == 0:
for i in range(n + 1):
r = n - i
if isEven(r):
S.append((int(r / 2.0), i, int(r / 2.0)))
#
if d > 0:
for i in range(d, n + 1):
r = n - i
if i == d:
S.append((0, n - d, d))
elif i > d:
r = n - (i + (i - d))
if 0 <= r <= n:
S.append((i - d, r, i))
#
if d < 0:
for i in range(abs(d), n + 1):
r = n - i
if i == abs(d):
S.append((-d, n + d, 0))
elif i > d:
r = n - (i + (i + d))
if 0 <= r <= n:
S.append((i, r, i + d))
#
return S
#
[docs]def multinomial(K, P):
N = numpy.sum(K, 1)
if K.shape == P.shape:
return tricoeff(N, K) * numpy.prod([ numpy.power(P[i], K[i]) for i in range(len(K)) ], 1)
else:
return tricoeff(N, K) * numpy.prod([ numpy.power(P, K[i]) for i in range(len(K)) ], 1)
#
[docs]def log_fac(n):
return numpy.sum(numpy.log(numpy.arange(2, n + 1)))
#
[docs]def tricoeff(N, S):
try:
LOG_FAC
except NameError:
LOG_FAC = []
for i in range(numpy.max(N) + 1):
LOG_FAC.append(log_fac(i))
#
LOG_FAC = numpy.array(LOG_FAC)
#
return numpy.exp(LOG_FAC[N] - (LOG_FAC[S[:, 0]] + LOG_FAC[S[:, 1]] + LOG_FAC[S[:, 2]]))
#
[docs]def isEven(x):
return x % 2 == 0
#
[docs]def regress(X,Y):
"""Performs linear regression given two vectors, X, Y."""
N = len(X)
xbar = numpy.average(X)
ybar = numpy.average(Y)
xybar = numpy.average([X[i]*Y[i] for i in range(N)])
x2bar = numpy.average([X[i]*X[i] for i in range(N)])
B = (xybar - xbar*ybar)/(x2bar - xbar*xbar)
A0 = ybar - B*xbar
yfit = [ A0 + B *X[i] for i in range(N)]
yres = [Y[i] - (A0 + B *X[i]) for i in range(N)]
var = sum([math.pow(yres[i],2) for i in range(N) ])/(N-2)
std = math.sqrt(var)
return(B, A0, std)
#
#return math.log(x) if abs(lambdax) < 1.0e-5 else (x**lambdax - 1.0)/lambdax
#
[docs]def loglik(X, lambdax):
"""
Computes the log-likelihood function for a transformed vector Xtransform.
"""
n = len(X)
Xtrans = [boxcoxtransform(x, lambdax) for x in X]
meanX = sum(Xtrans) / float(n)
S2 = (lambdax - 1.0) * sum([math.log(x) for x in X])
S = sum([(x-meanX) **2 for x in Xtrans])
S1= (-n/2.0)*math.log(S/n)
return S2+S1
#
[docs]def boxcoxTable(X, minlambda, maxlambda, dellambda):
"""
Returns a table of (loglik function, lambda) pairs
for the data.
"""
# Create a table (lambda, loglik)
out = []
vallambda = minlambda
while vallambda <= maxlambda+1.0e-5:
llik = loglik(X, vallambda)
out.append((llik, vallambda))
vallambda += dellambda
return out
#
[docs]def phi_coefficient(X,Y):
"""Calculates the phi-coefficient for two bool arrays"""
N = len(X)
assert len(X) == len(Y), "Length of arrays must be equal"
x1y1 = sum([int(X[j]) == int(Y[j]) == 1 for j in range(N)])
x1y0 = sum([int(X[j]) == 1 and int(Y[j]) == 0 for j in range(N)])
x0y1 = sum([int(X[j]) == 0 and int(Y[j]) == 1 for j in range(N)])
x0y0 = sum([int(X[j]) == int(Y[j]) == 0 for j in range(N)])
x1 = x1y1 + x1y0
x0 = x0y1 + x0y0
y1 = x1y1 + x0y1
y0 = x1y0 + x0y0
phi_coeff = (x1y1*x0y0 - x1y0*x0y1)/math.sqrt(x1*x0*y1*y0)
return phi_coeff
#
[docs]def BH_fdr_correction(X):
"""Adjusts p-values using the Benjamini Hochberg procedure"""
n = len(X)
qvalues = numpy.zeros(n)
pvalues = numpy.array(X)
pvalues.sort()
pvalues = pvalues[::-1]
for i in xrange(n):
rank = n - i
qvalues[i] = n/float(rank) * pvalues[i]
for i in xrange(0, n-1):
if qvalues[i] < qvalues[i+1]:
qvalues[i+1] = qvalues[i]
p2qval = dict([(p,q) for (p,q) in zip(pvalues,qvalues)])
return numpy.array([p2qval[p] for p in X])
#
[docs]def bayesian_ess_thresholds(Z_raw, ALPHA=0.05):
"""Returns Essentiality Thresholds using a BH-like procedure"""
Z = numpy.sort(Z_raw)[::-1]
W = 1 - Z
N = len(Z)
ess_threshold = 1.00
INDEX = range(3, N+1)
count = 0
for i in INDEX:
count +=1
wi = 1 - Z[i-1]
ai_n = (ALPHA*i)/N
mean_wi = numpy.average(W[0:i-2])
delta_w = wi - mean_wi
#if count < 30: print i, wi, ai_n, delta_w
if delta_w > ai_n:
ess_threshold = Z[i-1]
#print "i", i
break
noness_threshold = 0.00
count = 0
INDEX = range(0, N+1)
INDEX.sort(reverse=True)
for i in INDEX:
wi = Z[N-i+1]
ai_n = (ALPHA*i)/N
mean_wi = numpy.average(Z[N-i+1:])
delta_w = Z[N-i+1] - mean_wi
count +=1
#print count
#if count < 20:
# print i, wi, ai_n, mean_wi, delta_w, N-i+1, N-1, W[N-i-1], W[i-1]
if ai_n > delta_w:
# print i, wi, ai_n, mean_wi, delta_w, N-i+1, N-1, W[N-i-1], W[i-1]
break
noness_threshold = Z[N-i]
return(ess_threshold, noness_threshold)
#
[docs]def tricube(X):
#TODO: Write docstring
result = numpy.zeros(len(X))
ii = numpy.logical_and(X >= -1, X <= 1)
result[ii] = numpy.power(1 - numpy.power(numpy.abs(X[ii]), 3), 3)
return result
#
[docs]def loess(X, Y, h=10000):
#TODO: Write docstring
smoothed = numpy.zeros(len(Y))
for i,x in enumerate(X):
W = tricube((X-x)/float(h))
sW = numpy.sum(W)
wsX = numpy.sum(W*X)
wsY = numpy.sum(W*Y)
wsXY = numpy.sum(W*X*Y)
sXX = numpy.sum(X*X)
B = (sW * wsXY - wsX * wsY)/(sW * sXX - numpy.power(wsX,2))
A = (wsY - B*wsX) / sW
smoothed[i] = B*x + A
return smoothed
#
[docs]def loess_correction(X, Y, h=10000, window=100):
#TODO: Write docstring
Y = numpy.array(Y)
size = len(X)/window + 1
x_w = numpy.zeros(size)
y_w = numpy.zeros(size)
for i in range(len(X)/window + 1):
x_w[i] = window*i
y_w[i] = sum(Y[window*i:window*(i+1)])
ysmooth = loess(x_w, y_w, h)
mline = numpy.mean(y_w)
y_w * (ysmooth/mline)
normalized_Y = numpy.zeros(len(Y))
for i in range(size):
normalized_Y[window*i:window*(i+1)] = Y[window*i:window*(i+1)] * (ysmooth[i]/mline)
return normalized_Y
#
[docs]def F_mean_diff_flat(*args, **kwargs):
A = args[0]
B = args[1]
return numpy.mean(B) - numpy.mean(A)
#
[docs]def F_sum_diff_flat(*args, **kwargs):
A = args[0]
B = args[1]
return numpy.sum(B) - numpy.sum(A)
#
[docs]def F_mean_diff_dict(*args, **kwargs):
D = args[0]
data1_total = 0; data2_total = 0;
data1_size = 0; data2_size = 0;
for L in D:
data1_total+= numpy.sum(D[L][0])
data1_size+= len(D[L][0])
data2_total+= numpy.sum(D[L][1])
data2_size+= len(D[L][1])
return (data2_total/float(data2_size)) - (data1_total/float(data1_size))
#
[docs]def F_sum_diff_dict(*args, **kwargs):
D = args[0]
data1_total = 0; data2_total = 0;
for L in D:
data1_total+= numpy.sum(D[L][0])
data2_total+= numpy.sum(D[L][1])
return data2_total - data1_total
#
[docs]def F_shuffle_flat(*args, **kwargs):
X = args[0]
return numpy.random.permutation(X)
#
[docs]def F_shuffle_dict_libraries(*args, **kwargs):
D = args[0]
E = {}
for L in D:
#print "L", L
n1 = len(D[L][0])
combined = numpy.append(D[L][0], D[L][1])
#print "combined", combined
perm = numpy.random.permutation(combined)
#print "perm", perm
#print "perm[:n1]", perm[:n1]
E[L] = numpy.array([perm[:n1], perm[n1:]])
#print "D[L]", D[L]
return E
#
[docs]def resampling(data1, data2, S=10000, testFunc=F_mean_diff_flat,
permFunc=F_shuffle_flat, adaptive=False, lib_str1="", lib_str2=""):
"""Does a permutation test on two sets of data.
Performs the resampling / permutation test given two sets of data using a
function defining the test statistic and a function defining how to permute
the data.
Args:
ar: List or numpy array with the first set of observations.
data2: List or numpy array with the second set of observations.
S: Number of permutation tests (or samples) to obtain.
testFunc: Function defining the desired test statistic. Should accept
two lists as arguments. Default is difference in means between
the observations.
permFunc: Function defining the way to permute the data. Should accept
one argument, the combined set of data. Default is random
shuffle.
adaptive: Cuts-off resampling early depending on significance.
Returns:
Tuple with described values
- test_obs -- Test statistic of observation.
- mean1 -- Arithmetic mean of first set of data.
- mean2 -- Arithmetic mean of second set of data.
- log2FC -- Normalized log2FC the means.
- pval_ltail -- Lower tail p-value.
- pval_utail -- Upper tail p-value.
- pval_2tail -- Two-tailed p-value.
- test_sample -- List of samples of the test statistic.
:Example:
>>> import pytransit.stat_tools as stat_tools
>>> import numpy
>>> X = numpy.random.random(100)
>>> Y = numpy.random.random(100)
>>> (test_obs, mean1, mean2, log2fc, pval_ltail, pval_utail, pval_2tail, test_sample) = stat_tools.resampling(X,Y)
>>> pval_2tail
0.2167
>>> test_sample[:3]
[0.076213992904990535, -0.0052513291091412784, -0.0038425140184765172]
"""
# Do basic sanity checks:
# - Check library strings match in some way
lib_diff = set(lib_str1) ^ set(lib_str2)
if lib_diff:
raise ValueError("At least one library string has a letter not used by the other:\ %s" % ", ".join(lib_diff))
# - Check input has some data
assert len(data1) > 0, "Data1 cannot be empty"
assert len(data2) > 0, "Data2 cannot be empty"
count_ltail = 0
count_utail = 0
count_2tail = 0
test_list = []
# Calculate basic statistics for the input data:
n1 = len(data1)
n2 = len(data2)
mean1 = 0
if n1 > 0:
mean1 = numpy.mean(data1)
mean2 = 0
if n2 > 0:
mean2 = numpy.mean(data2)
try:
# Only adjust log2FC if one of the means is zero
if mean1 > 0 and mean2 > 0:
log2FC = math.log((mean2)/(mean1),2)
else:
log2FC = math.log((mean2+1.0)/(mean1+1.0),2)
except:
log2FC = 0.0
# Get stats and info based on whether working with libraries or not:
nTAs = 0
if lib_str1:
# Get number of TA sites implied
nTAs = len(data1.flatten())/len(lib_str1)
assert len(data2.flatten())/len(lib_str2) == nTAs, "Datasets do not have matching sites;\
check input data and library strings."
# Get data
perm = get_lib_data_dict(data1, lib_str1, data2, lib_str2, nTAs)
test_obs = testFunc(perm)
else:
try:
test_obs = testFunc(data1, data2)
except Exception as e:
print ""
print "!"*100
print "Error: Could not apply test function to input data!"
print "data1", data1
print "data2", data2
print ""
print "\t%s" % e
print "!"*100
print ""
return None
perm = numpy.zeros(n1+n2)
perm[:n1] = data1
perm[n1:] = data2
count_ltail = 0
count_utail = 0
count_2tail = 0
test_list = []
s_performed = 0
for s in range(S):
if len(perm) >0:
perm = permFunc(perm)
if not lib_str1:
test_sample = testFunc(perm[:n1], perm[n1:])
else:
test_sample = testFunc(perm)
else:
test_sample = 0
test_list.append(test_sample)
if test_sample <= test_obs: count_ltail+=1
if test_sample >= test_obs: count_utail+=1
if abs(test_sample) >= abs(test_obs): count_2tail+=1
s_performed+=1
if adaptive:
if s_performed == round(S*0.01) or s_performed == round(S*0.1) or s_performed == round(S*1):
if count_2tail >= round(S*0.01*0.10):
break
pval_ltail = count_ltail/float(s_performed)
pval_utail = count_utail/float(s_performed)
pval_2tail = count_2tail/float(s_performed)
return (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, test_list)
#
[docs]def cumulative_average(new_x, n, prev_avg):
return ((new_x + (n*prev_avg))/(n+1.0), n+1)
#
[docs]def text_histogram(X, nBins = 20, resolution=200, obs = None):
MIN = numpy.min(X)
MAX = numpy.max(X)
bin_list = numpy.linspace(MIN, MAX, nBins)
hit_flag = "->"; empty_flag = " "
for b_l, b_u in zip(bin_list[:-2], bin_list[1:]):
Z = numpy.logical_and(b_l <= X, X < b_u)
density = numpy.mean(Z)
if obs != None and (b_l <= obs < b_u):
flag = hit_flag
else:
flag = empty_flag
print "%-12f\t%s|%s" % (b_l, flag, "#"*int(resolution*density))
Z = numpy.logical_and(bin_list[-1] <= X, X < float("inf"))
density = numpy.mean(Z)
if obs != None and (bin_list[-1] <= obs < float("inf")):
flag = hit_flag
else:
flag = empty_flag
print "%-12f\t%s|%s" % (bin_list[-1], flag, "#"*int(resolution*density))
[docs]def parse_lib_index(nData, libstr, nTAs):
full_index = numpy.arange(nData)
lib_to_index = {}
for (k,L) in enumerate(libstr):
if L not in lib_to_index: lib_to_index[L] = []
lib_to_index[L] += list(full_index[k*nTAs:((k+1)*nTAs)])
for L,index in lib_to_index.items():
lib_to_index[L] = numpy.array(index)
return lib_to_index
#
[docs]def combine_lib_dicts(L1, L2):
KEYS = L1.keys()
DATA = {}
for K in KEYS:
DATA[K] = numpy.array([L1[K], L2[K]])
return DATA
#
[docs]def get_lib_data_dict(data1, ctrl_lib_str, data2, exp_lib_str, nTAs):
lib1_index_dict = parse_lib_index(len(data1), ctrl_lib_str, nTAs)
lib2_index_dict = parse_lib_index(len(data2), exp_lib_str, nTAs)
lib1_data_dict = dict([(L, data1[lib1_index_dict[L]]) for L in sorted(lib1_index_dict)])
lib2_data_dict = dict([(L, data2[lib2_index_dict[L]]) for L in sorted(lib2_index_dict)])
data_dict = combine_lib_dicts(lib1_data_dict, lib2_data_dict)
return data_dict
#TEST-CASES
if __name__ == "__main__":
"""
n = 20
p = 0.5
k = 14
print ""
print "#########################################"
print "############ BINOM TEST #################"
print "#########################################"
print "Coin Tosses: %d" % n
print "Success Prob: %3.2f" % p
print "Observed: %d" % k
print ""
print "Left-Tail Test:"
print "%d tosses, p-value = %f" % (k, binom_test(k,n,p,"less"))
print ""
print "Right-Tail Test:"
print "%d tosses, p-value = %f" % (k, binom_test(k,n,p,"greater"))
print ""
print "Two-Sided Test:"
print "%d tosses, p-value = %f" % (k, binom_test(k,n,p,"two-sided"))
print ""
print ""
print "#########################################"
print "############ RESAMPLING #################"
print "#########################################"
data1 = scipy.stats.norm.rvs(100,10, size=1000)
data2 = scipy.stats.norm.rvs(105,10, size=1000)
(test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, test_list) = resampling(data1, data2, S=10000)
print "Data1:"
text_histogram(data1, nBins = 20)
print ""
print "Data2:"
text_histogram(data2, nBins = 20)
print ""
print "Results:", (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail)
print ""
print "Resampling Histogram:"
text_histogram(test_list, nBins = 20, obs=test_obs)
"""
## TEST
import pytransit.transit_tools as transit_tools
import pytransit.tnseq_tools as tnseq_tools
import pytransit.norm_tools as norm_tools
import sys
ctrldata = ["/pacific/home/mdejesus/transit/tests/GI/H37Rv_day0_rep1.wig",
"/pacific/home/mdejesus/transit/tests/GI/Rv2680_day0_rep1.wig",
"/pacific/home/mdejesus/transit/tests/GI/H37Rv_day0_rep2.wig",
"/pacific/home/mdejesus/transit/tests/GI/Rv2680_day0_rep2.wig"]
expdata = ["/pacific/home/mdejesus/transit/tests/GI/H37Rv_day32_rep1.wig",
"/pacific/home/mdejesus/transit/tests/GI/H37Rv_day32_rep2.wig",
"/pacific/home/mdejesus/transit/tests/GI/H37Rv_day32_rep3.wig",
"/pacific/home/mdejesus/transit/tests/GI/Rv2680_day32_rep1.wig",
"/pacific/home/mdejesus/transit/tests/GI/Rv2680_day32_rep2.wig",
"/pacific/home/mdejesus/transit/tests/GI/Rv2680_day32_rep3.wig"]
annotation = "/pacific/home/mdejesus/transit/tests/GI/H37Rv.prot_table"
i = 202
if len(sys.argv) > 1:
i = int(sys.argv[1])
DO_LIB = True
if len(sys.argv) > 2:
DO_LIB = bool(int(sys.argv[2]))
if DO_LIB:
ctrl_lib_str = "ABAB"
exp_lib_str = "AAABBB"
else:
ctrl_lib_str = ""
exp_lib_str = ""
Kctrl = len(ctrldata)
Kexp = len(expdata)
(data, position) = transit_tools.get_validated_data(ctrldata+expdata)
(K,N) = data.shape
(data, factors) = norm_tools.normalize_data(data, "TTR", ctrldata+expdata, annotation)
G = tnseq_tools.Genes(ctrldata + expdata, annotation, data=data, position=position)
gene = G[i]
print "\n\n"
print "#"*100
print "# (%s) NEW TEST: %s" % (DO_LIB, gene)
print "#"*100
print ""
ii = numpy.ones(gene.n) == 1
data1 = gene.reads[:Kctrl,ii].flatten()
data2 = gene.reads[Kctrl:,ii].flatten()
data_dict = get_lib_data_dict(data1, ctrl_lib_str, data2, exp_lib_str, gene.n)
if DO_LIB:
(test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, testlist) = resampling(data1, data2, S=10000, testFunc=F_mean_diff_dict, permFunc=F_shuffle_dict_libraries, adaptive=False, lib_str1=ctrl_lib_str, lib_str2=exp_lib_str)
else:
(test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, testlist) = resampling(data1, data2, S=10000, testFunc=F_mean_diff_flat, permFunc=F_shuffle_flat, adaptive=False, lib_str1=ctrl_lib_str, lib_str2=exp_lib_str)
print "Resampling Histogram:"
text_histogram(testlist, nBins = 20, obs=test_obs)