netsec-lab/competition/classifier.py
2021-06-05 17:26:02 +02:00

28 lines
740 B
Python

import pandas as pd
import pickle
# Preprocessing data - encode ip addresses to numerical values
def ip_to_bin(ip):
parts = ip.split('.')
return (int(parts[0]) << 24) + (int(parts[1]) << 16) + (int(parts[2]) << 8) + int(parts[3])
# 1. Import data
data = pd.read_csv('input.csv',
converters={
'sourceIPAddress': lambda x1: ip_to_bin(x1),
'destinationIPAddress': lambda x2: ip_to_bin(x2)
})
x = data.to_numpy()
# 2. Loading a trained model and predict
model = pickle.load(open('network_traffic_classifier.sav', 'rb'))
y_pred = model.predict(x)
data['label'] = y_pred
print(data)
# 3. Save output file
pickle.dump(pd, open('output.csv', 'wb'))