import itertools
import pathlib
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.stats.api as sms
% matplotlib inline
% config InlineBackend . figure_format = 'retina'
RENAME = {
"noaug" : "No aug." ,
"rotate" : "Rotate" ,
"colorjitter" : "Color jitter" ,
"horizontalflip" : "Horizontal flip" ,
"changertt" : "Change RTT" ,
"timeshift" : "Time shift" ,
"packetloss" : "Packet loss" ,
}
folder = pathlib . Path (
"./campaigns/ucdavis-icdm19/augmentation-at-loading-dropout-impact/campaign_summary/augment-at-loading-dropout-impact/"
)
df = pd . concat (
[
pd . read_parquet ( folder / f "runsinfo_flowpic_dim_ { flowpic_dim } .parquet" )
for flowpic_dim in ( 32 , 1500 )
]
)
df = df [
[
"flowpic_dim" ,
"test_split_name" ,
"aug_name" ,
"seed" ,
"split_index" ,
"acc" ,
"with_dropout" ,
]
]
df = df [ df [ "test_split_name" ] != "test-train-val-leftover" ]
df = df . replace ( RENAME )
df_with_dropout = (
df [ df [ "with_dropout" ] == True ]
. drop ( "with_dropout" , axis = 1 )
. rename ({ "acc" : "withdropout_acc" }, axis = 1 )
)
df_no_dropout = (
df [ df [ "with_dropout" ] == False ]
. drop ( "with_dropout" , axis = 1 )
. rename ({ "acc" : "nodropout_acc" }, axis = 1 )
)
df = pd . merge (
df_with_dropout ,
df_no_dropout ,
on = [
"flowpic_dim" ,
"test_split_name" ,
"aug_name" ,
"seed" ,
"split_index" ,
],
suffixes = [ "withdropout_" , "nodropout_" ],
)
df = df . iloc [ df [ "nodropout_acc" ] . dropna () . index ]
df [ "acc_diff" ] = df [ "withdropout_acc" ] - df [ "nodropout_acc" ]
def compute_confidence_intervals ( array , alpha = 0.05 ):
array = np . array ( array )
low , high = sms . DescrStatsW ( array ) . tconfint_mean ( alpha )
mean = array . mean ()
ci = high - mean
return ci
df_merged = df . groupby ([ "flowpic_dim" , "test_split_name" , "aug_name" ]) . agg (
{ "acc_diff" : [ "mean" , "std" , "count" , "min" , "max" , compute_confidence_intervals ]}
)
df_merged = df_merged . rename (
columns = { "compute_confidence_intervals" : "confidence_interval" }
)
df_merged = df_merged . droplevel ( 0 , axis = 1 )
plt . rcParams . update ({ "font.size" : 20 })
fig , axes = plt . subplots ( nrows = 1 , ncols = 4 , figsize = ( 15 , 6.5 ))
for ax , ( flowpic_dim , test_split_name ) in zip (
axes . flatten (), itertools . product (( 32 , 1500 ), ( "test-human" , "test-script" ))
):
df_tmp = df_merged . loc [( flowpic_dim , test_split_name )]
df_tmp = df_tmp . loc [ list ( RENAME . values ())]
ax . bar (
list ( df_tmp . index ),
df_tmp [ "mean" ],
yerr = df_tmp [ "confidence_interval" ],
align = "center" ,
alpha = 0.5 ,
ecolor = "black" ,
capsize = 10 ,
)
ax . set_title ( f " { test_split_name } \n ( { flowpic_dim } x { flowpic_dim } )" )
ax . set_xticklabels ( list ( df_tmp . index ), rotation = 90 , ha = "center" )
ax . set_ylim ( - 4.5 , 4.5 )
ax . yaxis . set_minor_locator ( mpl . ticker . MultipleLocator ( 1 ))
ax . grid ( axis = "y" , which = "both" , linestyle = ":" )
plt . tight_layout ()
plt . savefig ( "supervised_dropout_std.png" , dpi = 300 , bbox_inches = "tight" )
/tmp/ipykernel_97694/3765097497.py:23: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(list(df_tmp.index), rotation=90, ha="center")
/tmp/ipykernel_97694/3765097497.py:23: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(list(df_tmp.index), rotation=90, ha="center")
/tmp/ipykernel_97694/3765097497.py:23: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(list(df_tmp.index), rotation=90, ha="center")
/tmp/ipykernel_97694/3765097497.py:23: UserWarning: FixedFormatter should only be used together with FixedLocator
ax.set_xticklabels(list(df_tmp.index), rotation=90, ha="center")