Split exercise3 into multiple files
This commit is contained in:
parent
c4dc23a9f8
commit
aac0e096f7
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,3 +1,5 @@
|
|||||||
data
|
data
|
||||||
venv
|
venv
|
||||||
.idea
|
.idea
|
||||||
|
*.ipynb
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|||||||
@ -1,43 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import matplotlib.dates as mdate
|
|
||||||
from datetime import datetime
|
|
||||||
from statistics import median
|
|
||||||
|
|
||||||
'''
|
|
||||||
IMPORTANT: Remove leading space from column '# Bytes' in csv file or pandas
|
|
||||||
won't read it correctly.
|
|
||||||
'''
|
|
||||||
|
|
||||||
dataset = pd.read_csv('./global_last10years.csv', index_col=0).fillna(0)
|
|
||||||
|
|
||||||
# Convert unix timestamps to dates
|
|
||||||
timestamps = mdate.epoch2num(dataset.index)
|
|
||||||
date_formatter = mdate.DateFormatter('%y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
# List of packet counts per hour (daily avg)
|
|
||||||
ts_packets = dataset.iloc[:,1]
|
|
||||||
|
|
||||||
# List of bytes per hour (daily avg)
|
|
||||||
ts_bytes = dataset.iloc[:,0]
|
|
||||||
|
|
||||||
# List of unique source IPs per hour (daily avg)
|
|
||||||
ts_uIPs = dataset.iloc[:,2]
|
|
||||||
|
|
||||||
# List of unique destination IPs per hour (daily avg)
|
|
||||||
ts_uIPd = dataset.iloc[:,3]
|
|
||||||
|
|
||||||
### rep-14: Signals correlation ###
|
|
||||||
|
|
||||||
print('Correlations between different columns (NaNs replace with 0):')
|
|
||||||
print(dataset.corr(method='pearson').round(decimals=2))
|
|
||||||
|
|
||||||
### rep-15: Destinations/Sources ratio ###
|
|
||||||
|
|
||||||
ratio_uIPd_uIPs = median(ts_uIPd) / median(ts_uIPs)
|
|
||||||
print('Ratio between median IP destinations and median IP sources: ', ratio_uIPd_uIPs)
|
|
||||||
|
|
||||||
### rep-16: Peak in unique sources ###
|
|
||||||
|
|
||||||
date = datetime.fromtimestamp(ts_uIPs.idxmax())
|
|
||||||
print('Peak in unique sources: ', date)
|
|
||||||
File diff suppressed because it is too large
Load Diff
12
ex3/rep-14.py
Normal file
12
ex3/rep-14.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
'''
|
||||||
|
IMPORTANT: Remove leading space from column '# Bytes' in csv file or pandas
|
||||||
|
won't read it correctly.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Read dataset and fill missing rows with zeroes
|
||||||
|
dataset = pd.read_csv('data/global_last10years.csv', index_col=0).fillna(0)
|
||||||
|
|
||||||
|
print('Correlations between different columns (NaNs replace with 0):')
|
||||||
|
print(dataset.corr(method='pearson').round(decimals=2))
|
||||||
14
ex3/rep-15.py
Normal file
14
ex3/rep-15.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from statistics import median
|
||||||
|
|
||||||
|
# Read dataset and fill missing rows with zeroes
|
||||||
|
dataset = pd.read_csv('data/global_last10years.csv', index_col=0).fillna(0)
|
||||||
|
|
||||||
|
# List of unique source IPs per hour (daily avg)
|
||||||
|
ts_uIPs = dataset.iloc[:,2]
|
||||||
|
|
||||||
|
# List of unique destination IPs per hour (daily avg)
|
||||||
|
ts_uIPd = dataset.iloc[:,3]
|
||||||
|
|
||||||
|
ratio_uIPd_uIPs = median(ts_uIPd) / median(ts_uIPs)
|
||||||
|
print('Ratio between median IP destinations and median IP sources: ', round(ratio_uIPd_uIPs, 2))
|
||||||
12
ex3/rep-16.py
Normal file
12
ex3/rep-16.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import matplotlib.dates as mdate
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Read dataset and fill missing rows with zeroes
|
||||||
|
dataset = pd.read_csv('data/global_last10years.csv', index_col=0).fillna(0)
|
||||||
|
|
||||||
|
# List of unique source IPs per hour (daily avg)
|
||||||
|
ts_uIPs = dataset.iloc[:,2]
|
||||||
|
|
||||||
|
date = datetime.fromtimestamp(ts_uIPs.idxmax())
|
||||||
|
print('Peak in unique sources: ', date)
|
||||||
Loading…
x
Reference in New Issue
Block a user