An attempt to estimate Ne and sampling time from the correlation in segregting sites

In [1]:
# Testing the performance of Monte-Carlo Estimators 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import os
import sys
sys.path.append('../../src/')
from plot_utils import *

%matplotlib inline
In [2]:
plt.rcParams['font.sans-serif'] = "Arial"
plt.rcParams['figure.facecolor'] = "w"
plt.rcParams['figure.autolayout'] = True
plt.rcParams['pdf.fonttype'] = 3
from mpl_toolkits.axes_grid1 import make_axes_locatable



# Making the relevant figure directories that we want
supp_figdir = '../../plots/supp_figs/two_locus_stats/corrSASB_sims/'
os.makedirs(supp_figdir, exist_ok=True)
In [3]:
# Data input and output
fname = '../../results/corr_seg_sites/est_ta_Ne_corr_sa_sb.csv'
df = pd.read_csv(fname)
df.head()
Out[3]:
scenario ta ta_est se_ta_est Ne_est se_Ne_est
0 SerialConstant 0 1.366381e+03 2.093731e+01 8390.940526 22.308661
1 TennessenEuropean 0 3.449107e-41 3.425773e-41 6699.944048 8.384043
2 SerialConstant 100 3.760312e+02 1.470392e+01 8342.151167 21.402484
3 TennessenEuropean 100 1.916471e-20 4.049841e-21 6860.790907 7.295584
4 SerialConstant 1000 1.816264e+03 1.968878e+01 8367.027379 22.217670
In [4]:
fig, ax = plt.subplots(2,2, figsize=(5,4), sharex=True, sharey='row')

constant_df = df[df['scenario'] == 'SerialConstant']
ax[0,0].errorbar(constant_df.ta.values, constant_df.ta_est.values,
                 yerr=2*constant_df.se_ta_est, capsize=2, lw=2, linestyle='none', color='blue', zorder=5)

ax[0,0].scatter(constant_df.ta.values, constant_df.ta.values, color='red', s=20, marker='+', zorder=1)

# Plot estimates of Ne
ax[1,0].errorbar(constant_df.ta.values, constant_df.Ne_est.values,
                 yerr=2*constant_df.se_Ne_est, capsize=2, lw=2, linestyle='none', color='blue')

growth_df = df[df['scenario'] == 'TennessenEuropean']
ax[0,1].errorbar(growth_df.ta.values, growth_df.ta_est.values,
                 yerr=2*growth_df.se_ta_est, capsize=2, lw=2, linestyle='none', color='blue', zorder=5)
ax[0,1].scatter(growth_df.ta.values, growth_df.ta.values, color='red', marker='+', zorder=1)

# Plot estimates of Ne
ax[1,1].errorbar(growth_df.ta.values, growth_df.Ne_est.values,
                 yerr=2*growth_df.se_Ne_est, capsize=2, lw=2, linestyle='none', color='blue')

ax[1,0].axhline(1e4, linestyle='--', color='orange')
ax[1,1].axhline(6958., linestyle='--', color='orange')
ax[0,0].set_xlim(-1,1e5)

ax[0,0].set_title(r'Constant $N_e$', fontsize=12)
ax[0,1].set_title(r'Tennessen et al. 2012', fontsize=12)
ax[0,0].set_ylabel(r'$\hat{t}_a$', fontsize=14)
ax[1,0].set_ylabel(r'$\hat{N}_e$', fontsize=14)

ax[0,0].set_xscale('symlog')


label_multipanel(ax[0,:], ['A','B'], fontsize=14, fontweight='bold', va='top', ha='right')


debox(ax[0,0]); debox(ax[0,1]);
debox(ax[1,0]); debox(ax[1,1]);

fig.text(0.55, -0.01, r'$t_a$ (generations)', fontsize=14, va='center', ha='center')
plt.tight_layout()
plt.savefig(supp_figdir + 'lstsq_constant_growth.pdf', bbox_inches='tight', dpi=300)