Skip to content

Table 10: Performance comparison across augmentations for different flowpic sizes.

import pathlib

import numpy as np
import pandas as pd
from scipy.stats import tukey_hsd
folder = pathlib.Path(
    "campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/augment-at-loading-with-dropout"
)
df = pd.concat(
    (
        pd.read_parquet(folder / "runsinfo_flowpic_dim_1500.parquet"),
        pd.read_parquet(folder / "runsinfo_flowpic_dim_64.parquet"),
        pd.read_parquet(folder / "runsinfo_flowpic_dim_32.parquet"),
    )
)
# df = pd.read_parquet('campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/1684447037/merged_runsinfo.parquet')
df_script = df[df["test_split_name"] == "test-script"]

acc_32 = df_script[df_script["flowpic_dim"] == 32]["acc"].values.tolist()
acc_64 = df_script[df_script["flowpic_dim"] == 64]["acc"].values.tolist()
acc_1500 = df_script[df_script["flowpic_dim"] == 1500]["acc"].values.tolist()
res = tukey_hsd(acc_32, acc_64, acc_1500)
df = pd.DataFrame(
    np.array([res.pvalue[0, 1], res.pvalue[0, 2], res.pvalue[1, 2]]).reshape(-1, 1),
    columns=["pvalue"],
    index=pd.MultiIndex.from_arrays(
        [("32x32", "32x32", "64x64"), ("64x64", "1500x1500", "1500x1500")]
    ),
)
df = df.assign(is_different=df["pvalue"] < 0.05)
df
pvalue is_different
32x32 64x64 5.772842e-01 False
1500x1500 1.936038e-06 True
64x64 1500x1500 1.044272e-08 True