import numpy as np
from sympy import *
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from scipy.stats import binned_statistic, describe
import sys
sys.path.append('../../src/')
from aDNA_coal_sim import *
from plot_utils import *
%matplotlib inline
plt.rcParams['font.sans-serif'] = "Arial"
plt.rcParams['figure.facecolor'] = "w"
plt.rcParams['figure.autolayout'] = True
plt.rcParams['pdf.fonttype'] = 3
from mpl_toolkits.axes_grid1 import make_axes_locatable
import os
main_figdir = '../../plots/jointLD_stats/'
supp_figdir = '../../plots/supp_figs/jointLD_stats/'
os.makedirs(main_figdir, exist_ok=True)
os.makedirs(supp_figdir, exist_ok=True)
# Define variables in sympy...
rho,t,eta,gamma = symbols('rho t \eta \gamma')
gamma = rho*(1 - exp(-t *(rho/2 + 1)))/(rho + 2)
eta = 2*(1 - exp(-t *(rho/2 + 1)))/(rho + 2)
eT_AT_B_200 = (rho**2 + 14*rho + 36)/(rho**2 + 13*rho + 18)
eT_AT_B_111 = (rho**2 + 13*rho + 24)/(rho**2 + 13*rho + 18)
eT_AT_B_022 = (rho**2 + 13*rho + 22)/(rho**2 + 13*rho + 18)
# Defining the staggered statistics (from the note)
e_TATB_200_anc = (1 - gamma)*eT_AT_B_200 + gamma*eT_AT_B_111
e_TATB_022_anc = (1 - eta).simplify() * eT_AT_B_022 + eta*eT_AT_B_111
e_TATB_111_mod_coupled = (1-gamma)*eT_AT_B_111 + gamma*eT_AT_B_022
e_TATB_111_mod_uncoupled = (1-eta)*eT_AT_B_111 + eta*eT_AT_B_200
# Simplifying the statistics
e_TATB_200_anc = e_TATB_200_anc.factor().simplify()
e_TATB_022_anc = e_TATB_022_anc.factor().simplify()
e_TATB_111_mod_coupled = e_TATB_111_mod_coupled.factor().simplify()
e_TATB_111_mod_uncoupled = e_TATB_111_mod_uncoupled.factor().simplify()
e_TATB_022_anc
eD0DT_numerator = (e_TATB_200_anc - e_TATB_111_mod_coupled - e_TATB_111_mod_uncoupled + e_TATB_022_anc)
e_D0DT_numerator = eD0DT_numerator.factor().simplify()
e_D0DT_numerator
e_D0Dt_norm = e_D0DT_numerator / e_TATB_022_anc
e_D0Dt_norm # Note this does not actually go "negative"
def eD0Dt_norm_eval(r,ta):
"""Function just to evaluate the expression..."""
return(e_D0Dt_norm.evalf(subs={rho: r, t:ta}))
fig, ax = plt.subplots(1,1,figsize=(4,3))
rhos = np.logspace(-3, 3, 50)
tas = [0, 1e-2, 1e-1]
for ta in tqdm(tas):
cur_d0dt_norm = np.array([eD0Dt_norm_eval(r,ta) for r in rhos])
ax.plot(rhos, cur_d0dt_norm, lw=2, label=r'$t_a = %0.2f$' % ta)
ax.legend(fontsize=10)
ax.set_xscale('log')
ax.set_xlabel(r'$\rho$', fontsize=14)
ax.set_ylabel(r'$\frac{\mathbb{E}[D^{(0)}D^{(t)}]}{\mathbb{E}[p^{(0)}_A(1 - p^{(t)}_A)p_B^{(0)}(1 - p_B^{(t)}]}$', fontsize=14)
debox(ax);
plt.savefig(main_figdir + 'eD0Dt_theory.pdf', dpi=300, bbox_inches='tight')
ld_raw_test = pd.read_csv('../../results/ld_stats_raw/ld_stats_time_sep_raw.csv.gz')
ld_raw_test['ed0dt'] = ld_raw_test['d0dt']/ld_raw_test['pApB']
ld_raw_test.head()
def eD0Dt_norm_test(r,t):
""" Revamped version of that function ... """
return(((r+2)*(r+10))/((r**3 + 15*r**2 + 48*r + 48)*np.exp(t*(r+2)/2) - 4.0))
fig, ax = plt.subplots(1,1,figsize=(4,4))
for x in tqdm([0,100, 1000]):
ld_raw_df_filt = ld_raw_test[(ld_raw_test['ta'] == x) & (ld_raw_test.maf == 0.00)]
ax.scatter(ld_raw_df_filt.rec_dist*1e4, ld_raw_df_filt.ed0dt, s=5)
z = x/(1e4)
# Dealing with some rescaling between sims vs. theory (factors of two here ... )
cur_d0dt_norm = np.array([eD0Dt_norm_test(y,z/2) for y in np.unique(ld_raw_df_filt.rec_dist*1e4*4)])
ax.plot(np.unique(ld_raw_df_filt.rec_dist*1e4), cur_d0dt_norm, lw=2, label=r'$t_a = %d$' % int(z*1e4))
ax.legend()
ax.set_xscale('log')
ax.set_xlabel(r'$\rho$', fontsize=14)
ax.set_ylabel(r'$\sigma^2_t$', fontsize=14)
debox(ax);
ax.set_xlim(0.9, 1.5e2)
ax.set_ylim(-0.01,0.175)
plt.savefig(main_figdir + 'eD0Dt_theory_sims.pdf', dpi=300, bbox_inches='tight')