import pathlib
import numpy as np
import pandas as pd
from scipy.stats import tukey_hsd
folder = pathlib.Path(
"campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/augment-at-loading-with-dropout"
)
df = pd.concat(
(
pd.read_parquet(folder / "runsinfo_flowpic_dim_1500.parquet"),
pd.read_parquet(folder / "runsinfo_flowpic_dim_64.parquet"),
pd.read_parquet(folder / "runsinfo_flowpic_dim_32.parquet"),
)
)
# df = pd.read_parquet('campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/1684447037/merged_runsinfo.parquet')
df_script = df[df["test_split_name"] == "test-script"]
acc_32 = df_script[df_script["flowpic_dim"] == 32]["acc"].values.tolist()
acc_64 = df_script[df_script["flowpic_dim"] == 64]["acc"].values.tolist()
acc_1500 = df_script[df_script["flowpic_dim"] == 1500]["acc"].values.tolist()
res = tukey_hsd(acc_32, acc_64, acc_1500)
df = pd.DataFrame(
np.array([res.pvalue[0, 1], res.pvalue[0, 2], res.pvalue[1, 2]]).reshape(-1, 1),
columns=["pvalue"],
index=pd.MultiIndex.from_arrays(
[("32x32", "32x32", "64x64"), ("64x64", "1500x1500", "1500x1500")]
),
)
df = df.assign(is_different=df["pvalue"] < 0.05)