import pandas as pd import statistics as stat import numpy as np import matplotlib.pyplot as plt import math as m dataset = pd.read_csv('csv/team13_protocol.csv').fillna(0) dataset['timestamp'] = pd.to_datetime(dataset['timestamp'], unit='s') dataset = dataset.set_index('timestamp') print(dataset) p6_pkts = dataset['6 · # Packets'].tolist() #return number of elements in p6_pkts list n = len(p6_pkts) #calculate median median = stat.median(p6_pkts) #in case of 0 values, replace it with the median p6_pkts = [ p6_pkts[i] if p6_pkts[i] !=0 else median for i in range(0,n) ] pkt_fft = np.fft.fft(p6_pkts) #calculates the fft pkt_amp = np.abs(pkt_fft) #returns absolute values k = range(0,n-1) #creates an array from 0 to n-1 #set k values in the x-axis and specify the limit x = k[1:m.floor(n/2)] #set amp. values in the y-axis and specify the limit y = pkt_amp[1:m.floor(n/2)] plt.figure(figsize=(20,10)) plt.tight_layout(h_pad=0.5) plt.subplot(2, 1, 1) plt.stem(x, y) #find max index and value max_k = np.flip(np.argsort(pkt_amp[1:m.floor(n/2)]))[0] max_amp = pkt_amp[1:m.floor(n/2)][max_k] plt.xlim(1, m.floor(n/2)) plt.xlabel('k') #sets x-axis label #set y-axis label plt.ylabel('Amplitude [millions of pkts]') plt.title('Amp. Spectrum for #pkts') #displays title print('TCP #pkts/hour - FFT max value: ', round(max_amp / 1000000, 2)) print('TCP #pkts/hour - k of FFT max value: ', np.where(pkt_amp == max_amp)) print('TCP #pkts/hour - period of k corresponding to FFT max value: ', n / 5) p6_uIPs = dataset['6 · # Unique Source IPs'].tolist() #return number of elements in p6_pkts list n_2 = len(p6_uIPs) #calculate median median_2 = stat.median(p6_uIPs) #in case of 0 values, replace it with the median p6_uIPs = [ p6_uIPs[i] if p6_uIPs[i] !=0 else median_2 for i in range(0,n_2) ] uIPs_fft = np.fft.fft(p6_uIPs) #calculates the fft uIPs_amp = np.abs(uIPs_fft) #returns absolute values k = range(0,n_2-1) #creates an array from 0 to n-1 #set k values in the x-axis and specify the limit x = k[1:m.floor(n_2/2)] #set amp. values in the y-axis and specify the limit y = uIPs_amp[1:m.floor(n/2)] plt.subplot(2, 1, 2) plt.stem(x, y) #find max index and value max_k = np.flip(np.argsort(uIPs_amp[1:m.floor(n_2/2)]))[0] max_amp = uIPs_amp[1:m.floor(n_2/2)][max_k] plt.xlim(1, m.floor(n_2/2)) plt.xlabel('k') #sets x-axis label #set y-axis label plt.ylabel('Amplitude [millions of unique source IPs]') plt.title('Amp. Spectrum for #unique source IPs') #displays title plt.show() print('TCP #uIPs/hour - FFT max value: ', round(max_amp / 1000000, 2)) print('TCP #uIPs/hour - k of FFT max value: ', np.where(uIPs_amp == max_amp)) print('TCP #uIPs/hour - period of k corresponding to FFT max value: ', n_2 / 31) # Plot TCP #pkts/hour and TCP #uIPs/hour timeseries plt.figure(figsize=(20,10)) plt.tight_layout(h_pad=2) plt.subplot(2, 1, 1) dataset['6 · # Packets'].plot(xlabel='', ylabel='Number of TCP Packets', title='Number of TCP Packets over Time') plt.subplot(2, 1, 2) dataset['6 · # Unique Source IPs'].plot(xlabel='Time', ylabel='Number of Unique TCP Source IPs', title='Number of Unique TCP Source IPs over Time') plt.show()