{
"cells": [
{
"cell_type": "markdown",
"id": "305e1928-81a8-4a17-9166-339e25835705",
"metadata": {},
"source": [
"# Table 2 : Datasets properties"
]
},
{
"cell_type": "markdown",
"id": "acb408e0-74e5-49d7-9640-24aa83a99018",
"metadata": {},
"source": [
"[:simple-jupyter: :material-download:](/papers/imc23/notebooks/table2_datasets_properties.ipynb)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "f6a0b7d4-7575-44e6-8777-0d3c4edb5e99",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:52:27.576339Z",
"iopub.status.busy": "2023-09-08T10:52:27.575973Z",
"iopub.status.idle": "2023-09-08T10:52:27.580116Z",
"shell.execute_reply": "2023-09-08T10:52:27.579360Z",
"shell.execute_reply.started": "2023-09-08T10:52:27.576310Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import tcbench as tcb"
]
},
{
"cell_type": "markdown",
"id": "0e4a9e81-2453-421f-bae2-440b788a201a",
"metadata": {},
"source": [
"## ucdavis-icdm19"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "5150b4c8-c0db-46a2-81ae-7b5622d68009",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:52:28.164815Z",
"iopub.status.busy": "2023-09-08T10:52:28.164480Z",
"iopub.status.idle": "2023-09-08T10:52:32.027431Z",
"shell.execute_reply": "2023-09-08T10:52:32.026564Z",
"shell.execute_reply.started": "2023-09-08T10:52:28.164788Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" classes | \n",
" flows_all | \n",
" flows_min | \n",
" flows_max | \n",
" rho | \n",
" mean_pkts | \n",
"
\n",
" \n",
" partition | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" pretraining | \n",
" 5 | \n",
" 6439 | \n",
" 592 | \n",
" 1915 | \n",
" 3.2 | \n",
" 6653.0 | \n",
"
\n",
" \n",
" retraining-human-triggered | \n",
" 5 | \n",
" 83 | \n",
" 15 | \n",
" 20 | \n",
" 1.3 | \n",
" 7666.0 | \n",
"
\n",
" \n",
" retraining-script-triggered | \n",
" 5 | \n",
" 150 | \n",
" 30 | \n",
" 30 | \n",
" 1.0 | \n",
" 7131.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" classes flows_all flows_min flows_max rho \\\n",
"partition \n",
"pretraining 5 6439 592 1915 3.2 \n",
"retraining-human-triggered 5 83 15 20 1.3 \n",
"retraining-script-triggered 5 150 30 30 1.0 \n",
"\n",
" mean_pkts \n",
"partition \n",
"pretraining 6653.0 \n",
"retraining-human-triggered 7666.0 \n",
"retraining-script-triggered 7131.0 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df = tcb.load_parquet(tcb.DATASETS.UCDAVISICDM19)\n",
"\n",
"# add number of packets\n",
"df = df.assign(packets=df[\"pkts_size\"].apply(len))\n",
"\n",
"# number of samples\n",
"df_tmp = pd.DataFrame(\n",
" df.groupby([\"partition\", \"app\"])[\"app\"].value_counts()\n",
").reset_index()\n",
"df_tmp = df_tmp.pivot(index=\"partition\", columns=\"app\", values=\"count\")\n",
"df_tmp = df_tmp.assign(\n",
" count=df_tmp.sum(axis=1),\n",
" flows_min=df_tmp.min(axis=1),\n",
" flows_max=df_tmp.max(axis=1),\n",
" rho=(df_tmp.max(axis=1) / df_tmp.min(axis=1)).round(1),\n",
" classes=len(df[\"app\"].cat.categories),\n",
")\n",
"\n",
"# mean pkts per flow\n",
"mean_pkts = df.groupby(\"partition\")[\"packets\"].mean().round(0)\n",
"mean_pkts.name = \"mean_pkts\"\n",
"flows_all = df.groupby(\"partition\")[\"partition\"].count()\n",
"flows_all.name = \"flows_all\"\n",
"\n",
"# combining everything together\n",
"df_tmp = pd.concat((df_tmp, mean_pkts, flows_all), axis=1)\n",
"df_tmp = df_tmp[[\"classes\", \"flows_all\", \"flows_min\", \"flows_max\", \"rho\", \"mean_pkts\"]]\n",
"display(df_tmp)\n",
"\n",
"stats_ucdavis19 = df_tmp"
]
},
{
"cell_type": "markdown",
"id": "ed063f85-a3be-42f1-bde2-7a360b6e40a0",
"metadata": {},
"source": [
"## mirage19"
]
},
{
"cell_type": "markdown",
"id": "586e176e-5e19-44c7-9038-a09a825512b6",
"metadata": {},
"source": [
"The unfiltered version of the dataset has an extra class, which corresponds to `\"background\"` traffic"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "f84d72c1-9d8d-46cc-97c5-82d7d9ee1e45",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:52:32.029597Z",
"iopub.status.busy": "2023-09-08T10:52:32.029209Z",
"iopub.status.idle": "2023-09-08T10:52:59.954118Z",
"shell.execute_reply": "2023-09-08T10:52:59.952713Z",
"shell.execute_reply.started": "2023-09-08T10:52:32.029564Z"
}
},
"outputs": [],
"source": [
"# unfiltered\n",
"df = tcb.load_parquet(tcb.DATASETS.MIRAGE19)\n",
"\n",
"ser = df[\"app\"].value_counts()\n",
"df_unfiltered = pd.DataFrame(\n",
" [\n",
" dict(\n",
" classes=len(ser),\n",
" flows_all=ser.sum(),\n",
" flows_min=ser.min(),\n",
" flows_max=ser.max(),\n",
" rho=(ser.max() / ser.min()).round(1),\n",
" mean_pkts=df[\"packets\"].mean().round(0),\n",
" )\n",
" ],\n",
" index=[\"unfiltered\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "2ec589ec-1c24-43ff-bc82-d5515d38b7e3",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:52:59.956364Z",
"iopub.status.busy": "2023-09-08T10:52:59.955911Z",
"iopub.status.idle": "2023-09-08T10:53:14.587000Z",
"shell.execute_reply": "2023-09-08T10:53:14.586068Z",
"shell.execute_reply.started": "2023-09-08T10:52:59.956325Z"
}
},
"outputs": [],
"source": [
"# min_pkts = 10\n",
"df = tcb.load_parquet(tcb.DATASETS.MIRAGE19, min_pkts=10)\n",
"\n",
"ser = df[\"app\"].value_counts()\n",
"df_minpkts10 = pd.DataFrame(\n",
" [\n",
" dict(\n",
" classes=len(ser),\n",
" flows_all=ser.sum(),\n",
" flows_min=ser.min(),\n",
" flows_max=ser.max(),\n",
" rho=(ser.max() / ser.min()).round(1),\n",
" mean_pkts=df[\"packets\"].mean().round(0),\n",
" )\n",
" ],\n",
" index=[\"min_pkts=10\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "dfa955c6-f808-467d-94ce-8ac8f855d999",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:53:14.590118Z",
"iopub.status.busy": "2023-09-08T10:53:14.589708Z",
"iopub.status.idle": "2023-09-08T10:53:14.603879Z",
"shell.execute_reply": "2023-09-08T10:53:14.603101Z",
"shell.execute_reply.started": "2023-09-08T10:53:14.590085Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" classes | \n",
" flows_all | \n",
" flows_min | \n",
" flows_max | \n",
" rho | \n",
" mean_pkts | \n",
"
\n",
" \n",
" \n",
" \n",
" unfiltered | \n",
" 21 | \n",
" 122007 | \n",
" 1986 | \n",
" 11737 | \n",
" 5.9 | \n",
" 23.0 | \n",
"
\n",
" \n",
" min_pkts=10 | \n",
" 20 | \n",
" 64172 | \n",
" 1013 | \n",
" 7505 | \n",
" 7.4 | \n",
" 17.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" classes flows_all flows_min flows_max rho mean_pkts\n",
"unfiltered 21 122007 1986 11737 5.9 23.0\n",
"min_pkts=10 20 64172 1013 7505 7.4 17.0"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_tmp = pd.concat((df_unfiltered, df_minpkts10), axis=0)\n",
"display(df_tmp)\n",
"stats_mirage19 = df_tmp"
]
},
{
"cell_type": "markdown",
"id": "0d91627a-3eaf-4f86-9cb8-37ec1f28e35d",
"metadata": {},
"source": [
"## mirage22"
]
},
{
"cell_type": "markdown",
"id": "93eff8ad-39df-40f1-b9e2-009902ce6f5b",
"metadata": {},
"source": [
"The unfiltered version of the dataset has an extra class, which corresponds to `\"background\"` traffic"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "ddc67fea-a5e5-4e8f-a35e-7bf2810728a1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:53:14.605364Z",
"iopub.status.busy": "2023-09-08T10:53:14.604988Z",
"iopub.status.idle": "2023-09-08T10:53:50.787877Z",
"shell.execute_reply": "2023-09-08T10:53:50.786963Z",
"shell.execute_reply.started": "2023-09-08T10:53:14.605332Z"
}
},
"outputs": [],
"source": [
"# unfiltered\n",
"df = tcb.load_parquet(tcb.DATASETS.MIRAGE22)\n",
"\n",
"ser = df[\"app\"].value_counts()\n",
"df_unfiltered = pd.DataFrame(\n",
" [\n",
" dict(\n",
" classes=len(ser),\n",
" flows_all=ser.sum(),\n",
" flows_min=ser.min(),\n",
" flows_max=ser.max(),\n",
" rho=(ser.max() / ser.min()).round(1),\n",
" mean_pkts=df[\"packets\"].mean().round(0),\n",
" )\n",
" ],\n",
" index=[\"unfiltered\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "ccb1e098-424e-449a-be52-bc6d5361088b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:53:50.789592Z",
"iopub.status.busy": "2023-09-08T10:53:50.789182Z",
"iopub.status.idle": "2023-09-08T10:54:00.911523Z",
"shell.execute_reply": "2023-09-08T10:54:00.910618Z",
"shell.execute_reply.started": "2023-09-08T10:53:50.789560Z"
}
},
"outputs": [],
"source": [
"# min_pkts = 10\n",
"df = tcb.load_parquet(tcb.DATASETS.MIRAGE22, min_pkts=10)\n",
"\n",
"ser = df[\"app\"].value_counts()\n",
"df_minpkts10 = pd.DataFrame(\n",
" [\n",
" dict(\n",
" classes=len(ser),\n",
" flows_all=ser.sum(),\n",
" flows_min=ser.min(),\n",
" flows_max=ser.max(),\n",
" rho=(ser.max() / ser.min()).round(1),\n",
" mean_pkts=df[\"packets\"].mean().round(0),\n",
" )\n",
" ],\n",
" index=[\"min_pkts=10\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "5109cc5e-c900-4f16-be15-fea9151df7c0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:54:00.913235Z",
"iopub.status.busy": "2023-09-08T10:54:00.912829Z",
"iopub.status.idle": "2023-09-08T10:54:08.278615Z",
"shell.execute_reply": "2023-09-08T10:54:08.277233Z",
"shell.execute_reply.started": "2023-09-08T10:54:00.913200Z"
}
},
"outputs": [],
"source": [
"# min_pkts = 1000\n",
"df = tcb.load_parquet(tcb.DATASETS.MIRAGE22, min_pkts=1000)\n",
"\n",
"ser = df[\"app\"].value_counts()\n",
"df_minpkts1000 = pd.DataFrame(\n",
" [\n",
" dict(\n",
" classes=len(ser),\n",
" flows_all=ser.sum(),\n",
" flows_min=ser.min(),\n",
" flows_max=ser.max(),\n",
" rho=(ser.max() / ser.min()).round(1),\n",
" mean_pkts=df[\"packets\"].mean().round(0),\n",
" )\n",
" ],\n",
" index=[\"min_pkts=1000\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "99cc5482-de63-45c2-822b-4a75010174ad",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:54:08.280934Z",
"iopub.status.busy": "2023-09-08T10:54:08.280378Z",
"iopub.status.idle": "2023-09-08T10:54:08.296172Z",
"shell.execute_reply": "2023-09-08T10:54:08.295396Z",
"shell.execute_reply.started": "2023-09-08T10:54:08.280893Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" classes | \n",
" flows_all | \n",
" flows_min | \n",
" flows_max | \n",
" rho | \n",
" mean_pkts | \n",
"
\n",
" \n",
" \n",
" \n",
" unfiltered | \n",
" 10 | \n",
" 59071 | \n",
" 2252 | \n",
" 18882 | \n",
" 8.4 | \n",
" 3068.0 | \n",
"
\n",
" \n",
" min_pkts=10 | \n",
" 9 | \n",
" 26773 | \n",
" 970 | \n",
" 4437 | \n",
" 4.6 | \n",
" 6598.0 | \n",
"
\n",
" \n",
" min_pkts=1000 | \n",
" 9 | \n",
" 4569 | \n",
" 190 | \n",
" 2220 | \n",
" 11.7 | \n",
" 38321.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" classes flows_all flows_min flows_max rho mean_pkts\n",
"unfiltered 10 59071 2252 18882 8.4 3068.0\n",
"min_pkts=10 9 26773 970 4437 4.6 6598.0\n",
"min_pkts=1000 9 4569 190 2220 11.7 38321.0"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_tmp = pd.concat((df_unfiltered, df_minpkts10, df_minpkts1000), axis=0)\n",
"display(df_tmp)\n",
"stats_mirage22 = df_tmp"
]
},
{
"cell_type": "markdown",
"id": "cd153f2b-1173-44e9-b178-ea2357ca5221",
"metadata": {},
"source": [
"## utmobilenet21"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "c24356c3-5af3-45eb-8d17-77a0ddd7e3da",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:54:08.298048Z",
"iopub.status.busy": "2023-09-08T10:54:08.297349Z",
"iopub.status.idle": "2023-09-08T10:54:09.717282Z",
"shell.execute_reply": "2023-09-08T10:54:09.716359Z",
"shell.execute_reply.started": "2023-09-08T10:54:08.298016Z"
}
},
"outputs": [],
"source": [
"# unfiltered\n",
"df = tcb.load_parquet(tcb.DATASETS.UTMOBILENET21)\n",
"\n",
"ser = df[\"app\"].value_counts()\n",
"df_unfiltered = pd.DataFrame(\n",
" [\n",
" dict(\n",
" classes=len(ser),\n",
" flows_all=ser.sum(),\n",
" flows_min=ser.min(),\n",
" flows_max=ser.max(),\n",
" rho=(ser.max() / ser.min()).round(1),\n",
" mean_pkts=df[\"packets\"].mean().round(0),\n",
" )\n",
" ],\n",
" index=[\"unfiltered\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "293e5eae-b66f-4c1b-bb90-551f588c02b2",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:54:09.721136Z",
"iopub.status.busy": "2023-09-08T10:54:09.720658Z",
"iopub.status.idle": "2023-09-08T10:54:10.966771Z",
"shell.execute_reply": "2023-09-08T10:54:10.965872Z",
"shell.execute_reply.started": "2023-09-08T10:54:09.721103Z"
}
},
"outputs": [],
"source": [
"# unfiltered\n",
"df = tcb.load_parquet(tcb.DATASETS.UTMOBILENET21, min_pkts=10)\n",
"\n",
"ser = df[\"app\"].value_counts()\n",
"df_minpkts10 = pd.DataFrame(\n",
" [\n",
" dict(\n",
" classes=len(ser),\n",
" flows_all=ser.sum(),\n",
" flows_min=ser.min(),\n",
" flows_max=ser.max(),\n",
" rho=(ser.max() / ser.min()).round(1),\n",
" mean_pkts=df[\"packets\"].mean().round(0),\n",
" )\n",
" ],\n",
" index=[\"minpkts=10\"],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "18ea2b9d-7233-4e00-911e-fcf734cde1fa",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:54:10.968434Z",
"iopub.status.busy": "2023-09-08T10:54:10.968032Z",
"iopub.status.idle": "2023-09-08T10:54:10.981438Z",
"shell.execute_reply": "2023-09-08T10:54:10.980665Z",
"shell.execute_reply.started": "2023-09-08T10:54:10.968402Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" classes | \n",
" flows_all | \n",
" flows_min | \n",
" flows_max | \n",
" rho | \n",
" mean_pkts | \n",
"
\n",
" \n",
" \n",
" \n",
" unfiltered | \n",
" 17 | \n",
" 34378 | \n",
" 159 | \n",
" 5591 | \n",
" 35.2 | \n",
" 664.0 | \n",
"
\n",
" \n",
" minpkts=10 | \n",
" 14 | \n",
" 9460 | \n",
" 130 | \n",
" 2496 | \n",
" 19.2 | \n",
" 2366.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" classes flows_all flows_min flows_max rho mean_pkts\n",
"unfiltered 17 34378 159 5591 35.2 664.0\n",
"minpkts=10 14 9460 130 2496 19.2 2366.0"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_tmp = pd.concat((df_unfiltered, df_minpkts10), axis=0)\n",
"display(df_tmp)\n",
"stats_utmobilenet21 = df_tmp"
]
},
{
"cell_type": "markdown",
"id": "a3948ee4-15e5-4b4e-b051-bd68dc33bd7c",
"metadata": {},
"source": [
"# alltogether"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "c39fba26-1ef1-455a-a31b-7ba45e4cf636",
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-08T10:54:10.982937Z",
"iopub.status.busy": "2023-09-08T10:54:10.982569Z",
"iopub.status.idle": "2023-09-08T10:54:11.017303Z",
"shell.execute_reply": "2023-09-08T10:54:11.016525Z",
"shell.execute_reply.started": "2023-09-08T10:54:10.982906Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" classes | \n",
" flows_all | \n",
" flows_min | \n",
" flows_max | \n",
" rho | \n",
" mean_pkts | \n",
"
\n",
" \n",
" dataset | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" ucdavis-icdm19 | \n",
" pretraining | \n",
" 5 | \n",
" 6439 | \n",
" 592 | \n",
" 1915 | \n",
" 3.2 | \n",
" 6653.0 | \n",
"
\n",
" \n",
" human | \n",
" 5 | \n",
" 83 | \n",
" 15 | \n",
" 20 | \n",
" 1.3 | \n",
" 7666.0 | \n",
"
\n",
" \n",
" script | \n",
" 5 | \n",
" 150 | \n",
" 30 | \n",
" 30 | \n",
" 1.0 | \n",
" 7131.0 | \n",
"
\n",
" \n",
" mirage19 | \n",
" unfiltered | \n",
" 21 | \n",
" 122007 | \n",
" 1986 | \n",
" 11737 | \n",
" 5.9 | \n",
" 23.0 | \n",
"
\n",
" \n",
" min_pkts=10 | \n",
" 20 | \n",
" 64172 | \n",
" 1013 | \n",
" 7505 | \n",
" 7.4 | \n",
" 17.0 | \n",
"
\n",
" \n",
" mirage22 | \n",
" unfiltered | \n",
" 10 | \n",
" 59071 | \n",
" 2252 | \n",
" 18882 | \n",
" 8.4 | \n",
" 3068.0 | \n",
"
\n",
" \n",
" min_pkts=10 | \n",
" 9 | \n",
" 26773 | \n",
" 970 | \n",
" 4437 | \n",
" 4.6 | \n",
" 6598.0 | \n",
"
\n",
" \n",
" min_pkts=1000 | \n",
" 9 | \n",
" 4569 | \n",
" 190 | \n",
" 2220 | \n",
" 11.7 | \n",
" 38321.0 | \n",
"
\n",
" \n",
" utmobilenet21 | \n",
" unfiltered | \n",
" 17 | \n",
" 34378 | \n",
" 159 | \n",
" 5591 | \n",
" 35.2 | \n",
" 664.0 | \n",
"
\n",
" \n",
" minpkts=10 | \n",
" 14 | \n",
" 9460 | \n",
" 130 | \n",
" 2496 | \n",
" 19.2 | \n",
" 2366.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" classes flows_all flows_min flows_max rho \\\n",
"dataset \n",
"ucdavis-icdm19 pretraining 5 6439 592 1915 3.2 \n",
" human 5 83 15 20 1.3 \n",
" script 5 150 30 30 1.0 \n",
"mirage19 unfiltered 21 122007 1986 11737 5.9 \n",
" min_pkts=10 20 64172 1013 7505 7.4 \n",
"mirage22 unfiltered 10 59071 2252 18882 8.4 \n",
" min_pkts=10 9 26773 970 4437 4.6 \n",
" min_pkts=1000 9 4569 190 2220 11.7 \n",
"utmobilenet21 unfiltered 17 34378 159 5591 35.2 \n",
" minpkts=10 14 9460 130 2496 19.2 \n",
"\n",
" mean_pkts \n",
"dataset \n",
"ucdavis-icdm19 pretraining 6653.0 \n",
" human 7666.0 \n",
" script 7131.0 \n",
"mirage19 unfiltered 23.0 \n",
" min_pkts=10 17.0 \n",
"mirage22 unfiltered 3068.0 \n",
" min_pkts=10 6598.0 \n",
" min_pkts=1000 38321.0 \n",
"utmobilenet21 unfiltered 664.0 \n",
" minpkts=10 2366.0 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_tmp = pd.concat(\n",
" (\n",
" (stats_ucdavis19.assign(dataset=\"ucdavis-icdm19\")).set_index(\n",
" [\"dataset\", stats_ucdavis19.index]\n",
" ),\n",
" (stats_mirage19.assign(dataset=\"mirage19\")).set_index(\n",
" [\"dataset\", stats_mirage19.index]\n",
" ),\n",
" (stats_mirage22.assign(dataset=\"mirage22\")).set_index(\n",
" [\"dataset\", stats_mirage22.index]\n",
" ),\n",
" (stats_utmobilenet21.assign(dataset=\"utmobilenet21\")).set_index(\n",
" [\"dataset\", stats_utmobilenet21.index]\n",
" ),\n",
" )\n",
").rename(\n",
" {\n",
" \"retraining-human-triggered\": \"human\",\n",
" \"retraining-script-triggered\": \"script\",\n",
" },\n",
" axis=0,\n",
")\n",
"display(df_tmp)\n",
"df_tmp.to_csv(\"table2_datasets_properties.csv\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}