diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66850fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +data +venv +.idea \ No newline at end of file diff --git a/ex2/README.md b/ex2/README.md index 55d38f9..a7f1a78 100644 --- a/ex2/README.md +++ b/ex2/README.md @@ -51,3 +51,39 @@ Output: 41 2 Name: protocolIdentifier, dtype: int64 ``` + +### rep-12 + +After running the command + +`go-flows run features pcap2flows.json export csv Ex2flows_team13.csv source libpcap Ex2_team13.pcap` + +we get the file `Ex2flows_team13.csv`. + +The following python script quickly extracts the +percentage of sources communicating with one or more than ten destinations: + +```python +import pandas as pd + +df = pd.read_csv(r'../data/Ex2flows_team13.csv') + +dataLength = len(df) + +singleDestinationFilter = df['distinct(destinationIPAddress)'] == 1 +moreThan10DestinationsFilter = df['distinct(destinationIPAddress)'] > 10 + +percentageOfSingleDst = len(df[singleDestinationFilter]) / dataLength * 100 +percentageOfMoreThan10Dst = len(df[moreThan10DestinationsFilter]) / dataLength * 100 + +print("Single Destination: {} %".format(round(percentageOfSingleDst, 3))) +print("More than 10 destinations: {} %".format(round(percentageOfMoreThan10Dst, 3))) +``` + +Output: + +``` +Length of dataset: 209434 +Single Destination: 94.901 % +More than 10 destinations: 0.796 % +``` \ No newline at end of file diff --git a/ex2/rep-12.py b/ex2/rep-12.py new file mode 100644 index 0000000..9158721 --- /dev/null +++ b/ex2/rep-12.py @@ -0,0 +1,14 @@ +import pandas as pd + +df = pd.read_csv(r'../data/Ex2flows_team13.csv') + +dataLength = len(df) + +singleDestinationFilter = df['distinct(destinationIPAddress)'] == 1 +moreThan10DestinationsFilter = df['distinct(destinationIPAddress)'] > 10 + +percentageOfSingleDst = len(df[singleDestinationFilter]) / dataLength * 100 +percentageOfMoreThan10Dst = len(df[moreThan10DestinationsFilter]) / dataLength * 100 + +print("Single Destination: {} %".format(round(percentageOfSingleDst, 3))) +print("More than 10 destinations: {} %".format(round(percentageOfMoreThan10Dst, 3)))