153 lines
4.7 KiB
Python
153 lines
4.7 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
|
|
dataset_hourly = pd.read_csv('csv/team13_monthly.csv', index_col=0).dropna()
|
|
dataset_daily = pd.read_csv('csv/global_last10years.csv', index_col=0).dropna()
|
|
|
|
table_a_info = {
|
|
'total sum': dataset_hourly.sum(),
|
|
'mean': dataset_hourly.mean(),
|
|
'median': dataset_hourly.median(),
|
|
'standard deviation': dataset_hourly.std(),
|
|
}
|
|
|
|
table_a = round((pd.DataFrame(table_a_info).set_index(dataset_hourly.columns)) / 1000000, 1)
|
|
print(table_a)
|
|
|
|
# Filter daily dataset by timestamp range of hourly dataset
|
|
dataset_daily_filtered = dataset_daily[(dataset_daily.index >= dataset_hourly.index[0]) &
|
|
(dataset_daily.index <= dataset_hourly.index[-1])]
|
|
|
|
table_b_info = {
|
|
'total sum': dataset_daily_filtered.sum(),
|
|
'mean': dataset_daily_filtered.mean(),
|
|
'median': dataset_daily_filtered.median(),
|
|
'standard deviation': dataset_daily_filtered.std(),
|
|
}
|
|
|
|
table_b = round((pd.DataFrame(table_b_info).set_index(dataset_daily.columns) / 1000000), 1)
|
|
print(table_b)
|
|
|
|
# Plotting hourly bytes histogram
|
|
plt.hist(dataset_hourly['#bytes'] / (1*10**9), bins=30)
|
|
plt.xlabel('GB/h')
|
|
plt.ylabel('Frequency')
|
|
plt.xticks(np.arange(1, 2.6, 0.1))
|
|
plt.grid()
|
|
plt.title('Histogram of gigabytes per hour during one month')
|
|
plt.show()
|
|
|
|
# Plotting daily bytes histogram
|
|
plt.hist(dataset_daily['# Bytes'] / (1*10**9), bins=40)
|
|
plt.xlabel('GB/day')
|
|
plt.ylabel('Frequency')
|
|
plt.grid()
|
|
plt.title('Histogram of gigabytes per day during 10 years')
|
|
plt.show()
|
|
|
|
# Plotting hourly packets histogram
|
|
plt.hist(dataset_hourly['#packets'] / (1*10**6), bins=30)
|
|
plt.xlabel('#packets/h in millions')
|
|
plt.ylabel('Frequency')
|
|
plt.grid()
|
|
plt.title('Histogram of packets per hour during one month')
|
|
plt.show()
|
|
|
|
# Plotting daily packets histogram
|
|
plt.hist(dataset_daily['# Packets'] / (1*10**6), bins=30)
|
|
plt.xlabel('#packets/day in millions')
|
|
plt.ylabel('Frequency')
|
|
plt.grid()
|
|
plt.title('Histogram of packets per day during 10 years')
|
|
plt.show()
|
|
|
|
# Plotting hourly unique source IPs histogram
|
|
plt.hist(dataset_hourly['#unique_IP_sources'] / (1*10**3), bins=30)
|
|
plt.xlabel('#uIPs/h in thousands')
|
|
plt.ylabel('Frequency')
|
|
plt.grid()
|
|
plt.title('Histogram of unique source IPs per hour during one month')
|
|
plt.show()
|
|
|
|
# Plotting daily unique source IPs histogram
|
|
plt.hist(dataset_daily['# Unique Source IPs'] / (1*10**3), bins=30)
|
|
plt.xlabel('#uIPs/day in thousands')
|
|
plt.ylabel('Frequency')
|
|
plt.grid()
|
|
plt.title('Histogram of unique source IPs per day during 10 years')
|
|
plt.show()
|
|
|
|
# Plotting hourly unique destination IPs histogram
|
|
plt.hist(dataset_hourly['#unique_IP_destinations'] / (1*10**6), bins=30)
|
|
plt.xlabel('#uIPd/h in millions')
|
|
plt.ylabel('Frequency')
|
|
plt.grid()
|
|
plt.title('Histogram of unique destination IPs per hour during one month')
|
|
plt.show()
|
|
|
|
# Plotting daily unique destination IPs histogram
|
|
plt.hist(dataset_daily['# Unique Destination IPs'] / (1*10**6), bins=30)
|
|
plt.xlabel('#uIPs/day in millions')
|
|
plt.ylabel('Frequency')
|
|
plt.grid()
|
|
plt.title('Histogram of unique destination IPs per day during 10 years')
|
|
plt.show()
|
|
|
|
# Plotting hourly bytes boxplot
|
|
plt.boxplot(dataset_hourly['#bytes'] / (1*10**9))
|
|
plt.ylabel('GB/h')
|
|
plt.grid()
|
|
plt.title('Boxplot of gigabytes per hour during one month')
|
|
plt.show()
|
|
|
|
# Plotting daily bytes boxplot
|
|
plt.boxplot(dataset_daily['# Bytes'] / (1*10**9))
|
|
plt.ylabel('GB/day')
|
|
plt.grid()
|
|
plt.title('Boxplot of gigabytes per day during 10 years')
|
|
plt.show()
|
|
|
|
# Plotting hourly packets boxplot
|
|
plt.boxplot(dataset_hourly['#packets'] / (1*10**6))
|
|
plt.xlabel('')
|
|
plt.ylabel('#packets/h in millions')
|
|
plt.grid()
|
|
plt.title('Boxplot of packets per hour during one month')
|
|
plt.show()
|
|
|
|
# Plotting daily packets boxplot
|
|
plt.boxplot(dataset_daily['# Packets'] / (1*10**6))
|
|
plt.ylabel('#packets/day in millions')
|
|
plt.grid()
|
|
plt.title('Boxplot of packets per day during 10 years')
|
|
plt.show()
|
|
|
|
# Plotting hourly unique source IPs boxplot
|
|
plt.boxplot(dataset_hourly['#unique_IP_sources'] / (1*10**3))
|
|
plt.ylabel('#uIPs/h in thousands')
|
|
plt.grid()
|
|
plt.title('Boxplot of unique source IPs per hour during one month')
|
|
plt.show()
|
|
|
|
# Plotting daily unique source IPs boxplot
|
|
plt.boxplot(dataset_daily['# Unique Source IPs'] / (1*10**3))
|
|
plt.ylabel('#uIPs/day in thousands')
|
|
plt.grid()
|
|
plt.title('Boxplot of unique source IPs per day during 10 years')
|
|
plt.show()
|
|
|
|
# Plotting hourly unique destination IPs boxplot
|
|
plt.boxplot(dataset_hourly['#unique_IP_destinations'] / (1*10**6))
|
|
plt.ylabel('#uIPd/h in millions')
|
|
plt.grid()
|
|
plt.title('Boxplot of unique destination IPs per hour during one month')
|
|
plt.show()
|
|
|
|
# Plotting daily unique destination IPs boxplot
|
|
plt.boxplot(dataset_daily['# Unique Destination IPs'] / (1*10**6))
|
|
plt.ylabel('#uIPd/day in millions')
|
|
plt.grid()
|
|
plt.title('Boxplot of unique destination IPs per day during 10 years')
|
|
plt.show()
|