An attempt to estimate Ne and sampling time from the correlation in segregting sites¶

# Testing the performance of Monte-Carlo Estimators 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import os
import sys
sys.path.append('../../src/')
from plot_utils import *

%matplotlib inline

plt.rcParams['font.sans-serif'] = "Arial"
plt.rcParams['figure.facecolor'] = "w"
plt.rcParams['figure.autolayout'] = True
plt.rcParams['pdf.fonttype'] = 3
from mpl_toolkits.axes_grid1 import make_axes_locatable



# Making the relevant figure directories that we want
supp_figdir = '../../plots/supp_figs/two_locus_stats/corrSASB_sims/'
os.makedirs(supp_figdir, exist_ok=True)

# Data input and output
fname = '../../results/corr_seg_sites/est_ta_Ne_corr_sa_sb.csv'
df = pd.read_csv(fname)
df.head()

fig, ax = plt.subplots(2,2, figsize=(5,4), sharex=True, sharey='row')

constant_df = df[df['scenario'] == 'SerialConstant']
ax[0,0].errorbar(constant_df.ta.values, constant_df.ta_est.values,
                 yerr=2*constant_df.se_ta_est, capsize=2, lw=2, linestyle='none', color='blue', zorder=5)

ax[0,0].scatter(constant_df.ta.values, constant_df.ta.values, color='red', s=20, marker='+', zorder=1)

# Plot estimates of Ne
ax[1,0].errorbar(constant_df.ta.values, constant_df.Ne_est.values,
                 yerr=2*constant_df.se_Ne_est, capsize=2, lw=2, linestyle='none', color='blue')

growth_df = df[df['scenario'] == 'TennessenEuropean']
ax[0,1].errorbar(growth_df.ta.values, growth_df.ta_est.values,
                 yerr=2*growth_df.se_ta_est, capsize=2, lw=2, linestyle='none', color='blue', zorder=5)
ax[0,1].scatter(growth_df.ta.values, growth_df.ta.values, color='red', marker='+', zorder=1)

# Plot estimates of Ne
ax[1,1].errorbar(growth_df.ta.values, growth_df.Ne_est.values,
                 yerr=2*growth_df.se_Ne_est, capsize=2, lw=2, linestyle='none', color='blue')

ax[1,0].axhline(1e4, linestyle='--', color='orange')
ax[1,1].axhline(6958., linestyle='--', color='orange')
ax[0,0].set_xlim(-1,1e5)

ax[0,0].set_title(r'Constant $N_e$', fontsize=12)
ax[0,1].set_title(r'Tennessen et al. 2012', fontsize=12)
ax[0,0].set_ylabel(r'$\hat{t}_a$', fontsize=14)
ax[1,0].set_ylabel(r'$\hat{N}_e$', fontsize=14)

ax[0,0].set_xscale('symlog')


label_multipanel(ax[0,:], ['A','B'], fontsize=14, fontweight='bold', va='top', ha='right')


debox(ax[0,0]); debox(ax[0,1]);
debox(ax[1,0]); debox(ax[1,1]);

fig.text(0.55, -0.01, r'$t_a$ (generations)', fontsize=14, va='center', ha='center')
plt.tight_layout()
plt.savefig(supp_figdir + 'lstsq_constant_growth.pdf', bbox_inches='tight', dpi=300)

	scenario	ta	ta_est	se_ta_est	Ne_est	se_Ne_est
0	SerialConstant	0	1.366381e+03	2.093731e+01	8390.940526	22.308661
1	TennessenEuropean	0	3.449107e-41	3.425773e-41	6699.944048	8.384043
2	SerialConstant	100	3.760312e+02	1.470392e+01	8342.151167	21.402484
3	TennessenEuropean	100	1.916471e-20	4.049841e-21	6860.790907	7.295584
4	SerialConstant	1000	1.816264e+03	1.968878e+01	8367.027379	22.217670