Skip to content

Table 4: Comparing data augmentation functions applied in supervised training.

import numpy as np
import pandas as pd
import statsmodels.stats.api as sms
import itertools
import pathlib
import tempfile
def compute_ci95(ser):
    low, high = sms.DescrStatsW(ser.values).tconfint_mean(alpha=0.05)
    mean = ser.mean()
    ci = high - mean
    return ci
folder_campaign_summary = pathlib.Path(
    "campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/augment-at-loading-with-dropout/"
)
# load results
df = pd.concat(
    [
        pd.read_parquet(folder_campaign_summary / "runsinfo_flowpic_dim_32.parquet"),
        pd.read_parquet(folder_campaign_summary / "runsinfo_flowpic_dim_64.parquet"),
        pd.read_parquet(folder_campaign_summary / "runsinfo_flowpic_dim_1500.parquet"),
    ]
)
df_agg_dict = dict()
for flowpic_dim in (32, 64, 1500):
    df_tmp = df[df["flowpic_dim"] == flowpic_dim]
    df_agg = df_tmp.groupby(["test_split_name", "aug_name"]).agg(
        {"acc": ["count", "mean", "std", compute_ci95]}
    )
    df_agg = df_agg.droplevel(0, axis=1).rename({"compute_ci95": "ci95"}, axis=1)
    fname = folder_campaign_summary / f"summary_flowpic_dim_{flowpic_dim}.csv"
    df_agg_dict[flowpic_dim] = df_agg
# loading imc22-paper results
# (there are oviously copied)

IMC22_TABLE_TEST_SCRIPT = """
aug_name,32,64,1500
No augmentation,98.67,99.1,96.22
Rotate,98.6,98.87,94.89
Horizontal flip,98.93,99.27,97.33
Color jitter,96.73,96.4,94.0
Packet loss,98.73,99.6,96.22
Time shift,99.13,99.53,97.56
Change rtt,99.4,100.0,98.44
"""

IMC22_TABLE_TEST_HUMAN = """
aug_name,32,64,1500
No augmentation,92.4,85.6,73.3
Rotate,93.73,87.07,77.3
Horizontal flip,94.67,79.33,87.9
Color jitter,82.93,74.93,68.0
Packet loss,90.93,85.6,84.0
Time shift,92.8,87.33,77.3
Change rtt,96.4,88.6,90.7
"""

with tempfile.NamedTemporaryFile("w") as f_tmp:
    f_tmp.write(IMC22_TABLE_TEST_SCRIPT)
    f_tmp.seek(0)
    df_imc22_table_test_script = pd.read_csv(f_tmp.name)
    df_imc22_table_test_script = df_imc22_table_test_script.set_index("aug_name")
    df_imc22_table_test_script.columns = pd.MultiIndex.from_product(
        [["imc22-paper"], df_imc22_table_test_script.columns, ["mean"]]
    )

with tempfile.NamedTemporaryFile("w") as f_tmp:
    f_tmp.write(IMC22_TABLE_TEST_HUMAN)
    f_tmp.seek(0)
    df_imc22_table_test_human = pd.read_csv(f_tmp.name)
    df_imc22_table_test_human = df_imc22_table_test_human.set_index("aug_name")
    df_imc22_table_test_human.columns = pd.MultiIndex.from_product(
        [["imc22-paper"], df_imc22_table_test_human.columns, ["mean"]]
    )
RENAMING = {
    "test-human": "human",
    "test-script": "script",
    "test-train-val-leftover": "leftover",
    "noaug": "No augmentation",
    "changertt": "Change rtt",
    "colorjitter": "Color jitter",
    "horizontalflip": "Horizontal flip",
    "packetloss": "Packet loss",
    "rotate": "Rotate",
    "timeshift": "Time shift",
}

AUG_NAME_ORDER = [
    "No augmentation",
    "Rotate",
    "Horizontal flip",
    "Color jitter",
    "Packet loss",
    "Time shift",
    "Change rtt",
]

partial_dfs = {
    "human": dict(),
    "script": dict(),
    "leftover": dict(),
}
for flowpic_dim in (32, 64, 1500):
    df_tmp = df_agg_dict[flowpic_dim][["mean", "ci95"]].round(2).reset_index()
    df_tmp = df_tmp.assign(
        test_split_name=df_tmp["test_split_name"].replace(RENAMING),
        aug_name=df_tmp["aug_name"].replace(RENAMING),
    )
    df_tmp = df_tmp.set_index("test_split_name", drop=True)
    for split_name in ("script", "human", "leftover"):
        df_partial = df_tmp.loc[split_name].copy()
        df_partial = df_partial.set_index("aug_name", drop=True)
        df_partial = df_partial.loc[AUG_NAME_ORDER]
        partial_dfs[split_name][flowpic_dim] = df_partial
df_ours_script = pd.concat(partial_dfs["script"], axis=1)
df_ours_script.columns = pd.MultiIndex.from_product(
    [["ours"], *df_ours_script.columns.levels]
)

df_ours_human = pd.concat(partial_dfs["human"], axis=1)
df_ours_human.columns = pd.MultiIndex.from_product(
    [["ours"], *df_ours_human.columns.levels]
)

df_ours_leftover = pd.concat(partial_dfs["leftover"], axis=1)
df_ours_leftover.columns = pd.MultiIndex.from_product(
    [["ours"], *df_ours_leftover.columns.levels]
)
print("=== test on script ===")
df_tmp = pd.concat((df_imc22_table_test_script, df_ours_script), axis=1)

df_tmp.loc["mean_diff", :] = np.nan
df_tmp.loc["mean_diff", ("ours", 32, "mean")] = (
    (df_tmp[("ours", 32, "mean")] - df_tmp[("imc22-paper", "32", "mean")])
    .mean()
    .round(2)
)
df_tmp.loc["mean_diff", ("ours", 64, "mean")] = (
    (df_tmp[("ours", 64, "mean")] - df_tmp[("imc22-paper", "64", "mean")])
    .mean()
    .round(2)
)
df_tmp.loc["mean_diff", ("ours", 1500, "mean")] = (
    (df_tmp[("ours", 1500, "mean")] - df_tmp[("imc22-paper", "1500", "mean")])
    .mean()
    .round(2)
)
display(df_tmp.fillna(""))
df_tmp.fillna("").to_csv(
    "table3_ucdavis-icdm19_comparing_data_augmentations_functions_test_on_script.csv"
)
=== test on script ===
imc22-paper ours
32 64 1500 32 64 1500
mean mean mean mean ci95 mean ci95 mean ci95
aug_name
No augmentation 98.67 99.1 96.22 95.64 0.37 95.87 0.29 94.93 0.72
Rotate 98.6 98.87 94.89 96.31 0.44 96.93 0.46 95.69 0.39
Horizontal flip 98.93 99.27 97.33 95.47 0.45 96.00 0.59 94.89 0.79
Color jitter 96.73 96.4 94.0 97.56 0.55 97.16 0.62 94.93 0.68
Packet loss 98.73 99.6 96.22 96.89 0.52 96.84 0.63 95.96 0.51
Time shift 99.13 99.53 97.56 96.71 0.6 97.16 0.49 96.89 0.27
Change rtt 99.4 100.0 98.44 97.29 0.35 97.02 0.46 96.93 0.31
mean_diff -2.05 -2.26 -0.63
print("=== test on human ===")
df_tmp = pd.concat((df_imc22_table_test_human, df_ours_human), axis=1)

df_tmp.loc["mean_diff", :] = np.nan
df_tmp.loc["mean_diff", ("ours", 32, "mean")] = (
    (df_tmp[("ours", 32, "mean")] - df_tmp[("imc22-paper", "32", "mean")])
    .mean()
    .round(2)
)
df_tmp.loc["mean_diff", ("ours", 64, "mean")] = (
    (df_tmp[("ours", 64, "mean")] - df_tmp[("imc22-paper", "64", "mean")])
    .mean()
    .round(2)
)
df_tmp.loc["mean_diff", ("ours", 1500, "mean")] = (
    (df_tmp[("ours", 1500, "mean")] - df_tmp[("imc22-paper", "1500", "mean")])
    .mean()
    .round(2)
)
display(df_tmp.fillna(""))
df_tmp.fillna("").to_csv(
    "table3_ucdavis-icdm19_comparing_data_augmentations_functions_test_on_human.csv"
)
=== test on human ===
imc22-paper ours
32 64 1500 32 64 1500
mean mean mean mean ci95 mean ci95 mean ci95
aug_name
No augmentation 92.4 85.6 73.3 68.84 1.45 69.08 1.35 69.32 1.63
Rotate 93.73 87.07 77.3 71.65 1.98 71.08 1.51 68.19 0.97
Horizontal flip 94.67 79.33 87.9 69.40 1.63 70.52 2.03 73.90 1.06
Color jitter 82.93 74.93 68.0 68.43 2.82 70.20 1.99 69.08 1.72
Packet loss 90.93 85.6 84.0 70.68 1.35 71.33 1.45 71.08 1.13
Time shift 92.8 87.33 77.3 70.36 1.63 71.89 1.59 71.08 1.33
Change rtt 96.4 88.6 90.7 70.76 1.99 71.49 1.59 71.97 1.08
mean_diff -21.96 -13.27 -9.13
print("=== test on leftover ===")
display(df_ours_leftover)
df_ours_leftover.to_csv(
    "table3_ucdavis-icdm19_comparing_data_augmentations_functions_test_on_leftover.csv"
)
=== test on leftover ===
ours
32 64 1500
mean ci95 mean ci95 mean ci95
aug_name
No augmentation 95.78 0.29 96.09 0.38 95.79 0.51
Rotate 96.74 0.35 97.00 0.38 95.79 0.31
Horizontal flip 95.68 0.40 96.32 0.59 95.97 0.80
Color jitter 96.93 0.56 96.46 0.46 95.47 0.49
Packet loss 96.99 0.39 97.25 0.39 96.84 0.49
Time shift 97.02 0.50 97.51 0.46 97.67 0.29
Change rtt 98.38 0.18 97.97 0.39 98.19 0.22