From 4d695a975540bb07858aff37c5eb349c2ad84527 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=BCnter=20Windsperger?= <e1302775@student.tuwien.ac.at>
Date: Fri, 7 May 2021 14:27:16 +0200
Subject: [PATCH] Add script for rep-12

---
 .gitignore    |  3 +++
 ex2/README.md | 36 ++++++++++++++++++++++++++++++++++++
 ex2/rep-12.py | 14 ++++++++++++++
 3 files changed, 53 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 ex2/rep-12.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..66850fe
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+data
+venv
+.idea
\ No newline at end of file
diff --git a/ex2/README.md b/ex2/README.md
index 55d38f9..a7f1a78 100644
--- a/ex2/README.md
+++ b/ex2/README.md
@@ -51,3 +51,39 @@ Output:
 41          2
 Name: protocolIdentifier, dtype: int64
 ```
+
+### rep-12
+
+After running the command
+
+`go-flows run features pcap2flows.json export csv Ex2flows_team13.csv source libpcap Ex2_team13.pcap`
+
+we get the file `Ex2flows_team13.csv`.
+
+The following python script quickly extracts the 
+percentage of sources communicating with one or more than ten destinations:
+
+```python
+import pandas as pd
+
+df = pd.read_csv(r'../data/Ex2flows_team13.csv')
+
+dataLength = len(df)
+
+singleDestinationFilter = df['distinct(destinationIPAddress)'] == 1
+moreThan10DestinationsFilter = df['distinct(destinationIPAddress)'] > 10
+
+percentageOfSingleDst = len(df[singleDestinationFilter]) / dataLength * 100
+percentageOfMoreThan10Dst = len(df[moreThan10DestinationsFilter]) / dataLength * 100
+
+print("Single Destination: {} %".format(round(percentageOfSingleDst, 3)))
+print("More than 10 destinations: {} %".format(round(percentageOfMoreThan10Dst, 3)))
+```
+
+Output:
+
+```
+Length of dataset: 209434
+Single Destination: 94.901 %
+More than 10 destinations: 0.796 %
+```
\ No newline at end of file
diff --git a/ex2/rep-12.py b/ex2/rep-12.py
new file mode 100644
index 0000000..9158721
--- /dev/null
+++ b/ex2/rep-12.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+df = pd.read_csv(r'../data/Ex2flows_team13.csv')
+
+dataLength = len(df)
+
+singleDestinationFilter = df['distinct(destinationIPAddress)'] == 1
+moreThan10DestinationsFilter = df['distinct(destinationIPAddress)'] > 10
+
+percentageOfSingleDst = len(df[singleDestinationFilter]) / dataLength * 100
+percentageOfMoreThan10Dst = len(df[moreThan10DestinationsFilter]) / dataLength * 100
+
+print("Single Destination: {} %".format(round(percentageOfSingleDst, 3)))
+print("More than 10 destinations: {} %".format(round(percentageOfMoreThan10Dst, 3)))