From 4d695a975540bb07858aff37c5eb349c2ad84527 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnter=20Windsperger?= Date: Fri, 7 May 2021 14:27:16 +0200 Subject: [PATCH] Add script for rep-12 --- .gitignore | 3 +++ ex2/README.md | 36 ++++++++++++++++++++++++++++++++++++ ex2/rep-12.py | 14 ++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 .gitignore create mode 100644 ex2/rep-12.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66850fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +data +venv +.idea \ No newline at end of file diff --git a/ex2/README.md b/ex2/README.md index 55d38f9..a7f1a78 100644 --- a/ex2/README.md +++ b/ex2/README.md @@ -51,3 +51,39 @@ Output: 41 2 Name: protocolIdentifier, dtype: int64 ``` + +### rep-12 + +After running the command + +`go-flows run features pcap2flows.json export csv Ex2flows_team13.csv source libpcap Ex2_team13.pcap` + +we get the file `Ex2flows_team13.csv`. + +The following python script quickly extracts the +percentage of sources communicating with one or more than ten destinations: + +```python +import pandas as pd + +df = pd.read_csv(r'../data/Ex2flows_team13.csv') + +dataLength = len(df) + +singleDestinationFilter = df['distinct(destinationIPAddress)'] == 1 +moreThan10DestinationsFilter = df['distinct(destinationIPAddress)'] > 10 + +percentageOfSingleDst = len(df[singleDestinationFilter]) / dataLength * 100 +percentageOfMoreThan10Dst = len(df[moreThan10DestinationsFilter]) / dataLength * 100 + +print("Single Destination: {} %".format(round(percentageOfSingleDst, 3))) +print("More than 10 destinations: {} %".format(round(percentageOfMoreThan10Dst, 3))) +``` + +Output: + +``` +Length of dataset: 209434 +Single Destination: 94.901 % +More than 10 destinations: 0.796 % +``` \ No newline at end of file diff --git a/ex2/rep-12.py b/ex2/rep-12.py new file mode 100644 index 0000000..9158721 --- /dev/null +++ b/ex2/rep-12.py @@ -0,0 +1,14 @@ +import pandas as pd + +df = pd.read_csv(r'../data/Ex2flows_team13.csv') + +dataLength = len(df) + +singleDestinationFilter = df['distinct(destinationIPAddress)'] == 1 +moreThan10DestinationsFilter = df['distinct(destinationIPAddress)'] > 10 + +percentageOfSingleDst = len(df[singleDestinationFilter]) / dataLength * 100 +percentageOfMoreThan10Dst = len(df[moreThan10DestinationsFilter]) / dataLength * 100 + +print("Single Destination: {} %".format(round(percentageOfSingleDst, 3))) +print("More than 10 destinations: {} %".format(round(percentageOfMoreThan10Dst, 3)))