import pandas as pd import matplotlib.pyplot as plt import numpy as np dataset_hourly = pd.read_csv('data/team13_monthly.csv', index_col=0).dropna() dataset_daily = pd.read_csv('data/global_last10years.csv', index_col=0).dropna() table_a_info = { 'total sum': dataset_hourly.sum(), 'mean': dataset_hourly.mean(), 'median': dataset_hourly.median(), 'standard deviation': dataset_hourly.std(), } table_a = round((pd.DataFrame(table_a_info).set_index(dataset_hourly.columns)) / 1000000, 1) print(table_a) # Filter daily dataset by timestamp range of hourly dataset dataset_daily_filtered = dataset_daily[(dataset_daily.index >= dataset_hourly.index[0]) & (dataset_daily.index <= dataset_hourly.index[-1])] table_b_info = { 'total sum': dataset_daily_filtered.sum(), 'mean': dataset_daily_filtered.mean(), 'median': dataset_daily_filtered.median(), 'standard deviation': dataset_daily_filtered.std(), } table_b = round((pd.DataFrame(table_b_info).set_index(dataset_daily.columns) / 1000000), 1) print(table_b) # Plotting hourly bytes histogram plt.hist(dataset_hourly['#bytes'] / 1000000000, bins=30) plt.xlabel('GB/h') plt.ylabel('Frequency') plt.xticks(np.arange(1, 2.6, 0.1)) plt.grid() plt.title('Histogram of gigabytes per hour during one month') plt.savefig('figures/bytes_hourly_hist.png') plt.show() # Plotting daily bytes histogram plt.hist(dataset_daily['# Bytes'] / 1000000000, bins=40) plt.xlabel('GB/day') plt.ylabel('Frequency') plt.xticks(np.arange(0, 5.5, 0.2)) plt.grid() plt.title('Histogram of gigabytes per day during 10 years') plt.savefig('figures/bytes_daily_hist.png') plt.show() # Plotting hourly packets histogram plt.hist(dataset_hourly['#packets'] / (1*10**6), bins=30) plt.xlabel('#packets/h in millions') plt.ylabel('Frequency') plt.grid() plt.title('Histogram of packets per hour during one month') plt.savefig('figures/packets_hourly_hist.png') plt.show() # Plotting daily packets histogram plt.hist(dataset_daily['# Packets'] / (1*10**6), bins=30) plt.xlabel('#packets/day in millions') plt.ylabel('Frequency') plt.grid() plt.title('Histogram of packets per day during 10 years') plt.savefig('figures/packets_daily_hist.png') plt.show() # Plotting hourly unique source IPs histogram plt.hist(dataset_hourly['#unique_IP_sources'] / (1*10**3), bins=30) plt.xlabel('#uIPs/h in thousands') plt.ylabel('Frequency') plt.grid() plt.title('Histogram of unique source IPs per hour during one month') plt.savefig('figures/uIPs_hourly_hist.png') plt.show() # Plotting daily unique source IPs histogram plt.hist(dataset_daily['# Unique Source IPs'] / (1*10**3), bins=30) plt.xlabel('#uIPs/day in thousands') plt.ylabel('Frequency') plt.grid() plt.title('Histogram of unique source IPs per day during 10 years') plt.savefig('figures/uIPs_daily_hist.png') plt.show() # Plotting hourly unique destination IPs histogram plt.hist(dataset_hourly['#unique_IP_destinations'] / (1*10**6), bins=30) plt.xlabel('#uIPd/h in millions') plt.ylabel('Frequency') plt.grid() plt.title('Histogram of unique destination IPs per hour during one month') plt.savefig('figures/uIPd_hourly_hist.png') plt.show() # Plotting daily unique destination IPs histogram plt.hist(dataset_daily['# Unique Destination IPs'] / (1*10**6), bins=30) plt.xlabel('#uIPs/day in millions') plt.ylabel('Frequency') plt.grid() plt.title('Histogram of unique destination IPs per day during 10 years') plt.savefig('figures/uIPd_daily_hist.png') plt.show() # Plotting hourly bytes boxplot plt.boxplot(dataset_hourly['#bytes'] / (1*10**9)) plt.ylabel('GB/h') plt.grid() plt.title('Boxplot of gigabytes per hour during one month') plt.show() plt.savefig('figures/bytes_hourly_boxplot.png') # Plotting daily bytes boxplot plt.boxplot(dataset_daily['# Bytes'] / (1*10**9)) plt.ylabel('GB/day') plt.grid() plt.title('Boxplot of gigabytes per day during 10 years') plt.show() plt.savefig('figures/bytes_daily_boxplot.png') # Plotting hourly packets boxplot plt.boxplot(dataset_hourly['#packets'] / (1*10**6)) plt.xlabel('') plt.ylabel('#packets/h in millions') plt.grid() plt.title('Boxplot of packets per hour during one month') plt.show() plt.savefig('figures/packets_hourly_boxplot.png') # Plotting daily packets boxplot plt.boxplot(dataset_daily['# Packets'] / (1*10**6)) plt.ylabel('#packets/day in millions') plt.grid() plt.title('Boxplot of packets per day during 10 years') plt.show() plt.savefig('figures/packets_daily_boxplot.png') # Plotting hourly unique source IPs boxplot plt.boxplot(dataset_hourly['#unique_IP_sources'] / (1*10**3)) plt.ylabel('#uIPs/h in thousands') plt.grid() plt.title('Boxplot of unique source IPs per hour during one month') plt.show() plt.savefig('figures/uIPs_hourly_boxplot.png') # Plotting daily unique source IPs boxplot plt.boxplot(dataset_daily['# Unique Source IPs'] / (1*10**3)) plt.ylabel('#uIPs/day in thousands') plt.grid() plt.title('Boxplot of unique source IPs per day during 10 years') plt.show() plt.savefig('figures/uIPs_daily_boxplot.png') # Plotting hourly unique destination IPs boxplot plt.boxplot(dataset_hourly['#unique_IP_destinations'] / (1*10**6)) plt.ylabel('#uIPd/h in millions') plt.grid() plt.title('Boxplot of unique destination IPs per hour during one month') plt.show() plt.savefig('figures/uIPd_hourly_boxplot.png') # Plotting daily unique destination IPs boxplot plt.boxplot(dataset_daily['# Unique Destination IPs'] / (1*10**6)) plt.ylabel('#uIPd/day in millions') plt.grid() plt.title('Boxplot of unique destination IPs per day during 10 years') plt.show() plt.savefig('figures/uIPd_daily_boxplot.png')