netsec-lab/ex3/rep-17.py

170 lines
5.5 KiB
Python

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
dataset_hourly = pd.read_csv('data/team13_monthly.csv', index_col=0).dropna()
dataset_daily = pd.read_csv('data/global_last10years.csv', index_col=0).dropna()
table_a_info = {
'total sum': dataset_hourly.sum(),
'mean': dataset_hourly.mean(),
'median': dataset_hourly.median(),
'standard deviation': dataset_hourly.std(),
}
table_a = round((pd.DataFrame(table_a_info).set_index(dataset_hourly.columns)) / 1000000, 1)
print(table_a)
# Filter daily dataset by timestamp range of hourly dataset
dataset_daily_filtered = dataset_daily[(dataset_daily.index >= dataset_hourly.index[0]) &
(dataset_daily.index <= dataset_hourly.index[-1])]
table_b_info = {
'total sum': dataset_daily_filtered.sum(),
'mean': dataset_daily_filtered.mean(),
'median': dataset_daily_filtered.median(),
'standard deviation': dataset_daily_filtered.std(),
}
table_b = round((pd.DataFrame(table_b_info).set_index(dataset_daily.columns) / 1000000), 1)
print(table_b)
# Plotting hourly bytes histogram
plt.hist(dataset_hourly['#bytes'] / 1000000000, bins=30)
plt.xlabel('GB/h')
plt.ylabel('Frequency')
plt.xticks(np.arange(1, 2.6, 0.1))
plt.grid()
plt.title('Histogram of gigabytes per hour during one month')
plt.savefig('figures/bytes_hourly_hist.png')
plt.show()
# Plotting daily bytes histogram
plt.hist(dataset_daily['# Bytes'] / 1000000000, bins=40)
plt.xlabel('GB/day')
plt.ylabel('Frequency')
plt.xticks(np.arange(0, 5.5, 0.2))
plt.grid()
plt.title('Histogram of gigabytes per day during 10 years')
plt.savefig('figures/bytes_daily_hist.png')
plt.show()
# Plotting hourly packets histogram
plt.hist(dataset_hourly['#packets'] / (1*10**6), bins=30)
plt.xlabel('#packets/h in millions')
plt.ylabel('Frequency')
plt.grid()
plt.title('Histogram of packets per hour during one month')
plt.savefig('figures/packets_hourly_hist.png')
plt.show()
# Plotting daily packets histogram
plt.hist(dataset_daily['# Packets'] / (1*10**6), bins=30)
plt.xlabel('#packets/day in millions')
plt.ylabel('Frequency')
plt.grid()
plt.title('Histogram of packets per day during 10 years')
plt.savefig('figures/packets_daily_hist.png')
plt.show()
# Plotting hourly unique source IPs histogram
plt.hist(dataset_hourly['#unique_IP_sources'] / (1*10**3), bins=30)
plt.xlabel('#uIPs/h in thousands')
plt.ylabel('Frequency')
plt.grid()
plt.title('Histogram of unique source IPs per hour during one month')
plt.savefig('figures/uIPs_hourly_hist.png')
plt.show()
# Plotting daily unique source IPs histogram
plt.hist(dataset_daily['# Unique Source IPs'] / (1*10**3), bins=30)
plt.xlabel('#uIPs/day in thousands')
plt.ylabel('Frequency')
plt.grid()
plt.title('Histogram of unique source IPs per day during 10 years')
plt.savefig('figures/uIPs_daily_hist.png')
plt.show()
# Plotting hourly unique destination IPs histogram
plt.hist(dataset_hourly['#unique_IP_destinations'] / (1*10**6), bins=30)
plt.xlabel('#uIPd/h in millions')
plt.ylabel('Frequency')
plt.grid()
plt.title('Histogram of unique destination IPs per hour during one month')
plt.savefig('figures/uIPd_hourly_hist.png')
plt.show()
# Plotting daily unique destination IPs histogram
plt.hist(dataset_daily['# Unique Destination IPs'] / (1*10**6), bins=30)
plt.xlabel('#uIPs/day in millions')
plt.ylabel('Frequency')
plt.grid()
plt.title('Histogram of unique destination IPs per day during 10 years')
plt.savefig('figures/uIPd_daily_hist.png')
plt.show()
# Plotting hourly bytes boxplot
plt.boxplot(dataset_hourly['#bytes'] / (1*10**9))
plt.ylabel('GB/h')
plt.grid()
plt.title('Boxplot of gigabytes per hour during one month')
plt.show()
plt.savefig('figures/bytes_hourly_boxplot.png')
# Plotting daily bytes boxplot
plt.boxplot(dataset_daily['# Bytes'] / (1*10**9))
plt.ylabel('GB/day')
plt.grid()
plt.title('Boxplot of gigabytes per day during 10 years')
plt.show()
plt.savefig('figures/bytes_daily_boxplot.png')
# Plotting hourly packets boxplot
plt.boxplot(dataset_hourly['#packets'] / (1*10**6))
plt.xlabel('')
plt.ylabel('#packets/h in millions')
plt.grid()
plt.title('Boxplot of packets per hour during one month')
plt.show()
plt.savefig('figures/packets_hourly_boxplot.png')
# Plotting daily packets boxplot
plt.boxplot(dataset_daily['# Packets'] / (1*10**6))
plt.ylabel('#packets/day in millions')
plt.grid()
plt.title('Boxplot of packets per day during 10 years')
plt.show()
plt.savefig('figures/packets_daily_boxplot.png')
# Plotting hourly unique source IPs boxplot
plt.boxplot(dataset_hourly['#unique_IP_sources'] / (1*10**3))
plt.ylabel('#uIPs/h in thousands')
plt.grid()
plt.title('Boxplot of unique source IPs per hour during one month')
plt.show()
plt.savefig('figures/uIPs_hourly_boxplot.png')
# Plotting daily unique source IPs boxplot
plt.boxplot(dataset_daily['# Unique Source IPs'] / (1*10**3))
plt.ylabel('#uIPs/day in thousands')
plt.grid()
plt.title('Boxplot of unique source IPs per day during 10 years')
plt.show()
plt.savefig('figures/uIPs_daily_boxplot.png')
# Plotting hourly unique destination IPs boxplot
plt.boxplot(dataset_hourly['#unique_IP_destinations'] / (1*10**6))
plt.ylabel('#uIPd/h in millions')
plt.grid()
plt.title('Boxplot of unique destination IPs per hour during one month')
plt.show()
plt.savefig('figures/uIPd_hourly_boxplot.png')
# Plotting daily unique destination IPs boxplot
plt.boxplot(dataset_daily['# Unique Destination IPs'] / (1*10**6))
plt.ylabel('#uIPd/day in millions')
plt.grid()
plt.title('Boxplot of unique destination IPs per day during 10 years')
plt.show()
plt.savefig('figures/uIPd_daily_boxplot.png')