28 lines
740 B
Python
28 lines
740 B
Python
import pandas as pd
|
|
import pickle
|
|
|
|
|
|
# Preprocessing data - encode ip addresses to numerical values
|
|
def ip_to_bin(ip):
|
|
parts = ip.split('.')
|
|
return (int(parts[0]) << 24) + (int(parts[1]) << 16) + (int(parts[2]) << 8) + int(parts[3])
|
|
|
|
|
|
# 1. Import data
|
|
data = pd.read_csv('input.csv',
|
|
converters={
|
|
'sourceIPAddress': lambda x1: ip_to_bin(x1),
|
|
'destinationIPAddress': lambda x2: ip_to_bin(x2)
|
|
})
|
|
x = data.to_numpy()
|
|
|
|
# 2. Loading a trained model and predict
|
|
model = pickle.load(open('network_traffic_classifier.sav', 'rb'))
|
|
y_pred = model.predict(x)
|
|
|
|
data['label'] = y_pred
|
|
print(data)
|
|
|
|
# 3. Save output file
|
|
pickle.dump(pd, open('output.csv', 'wb'))
|