From 7f8a6a76e8ab4a0bb13f5e24050f1c77cc9d3b1e Mon Sep 17 00:00:00 2001 From: Tobias Eidelpes Date: Sat, 5 Jun 2021 18:44:20 +0200 Subject: [PATCH] Convert numerical IPs back to addresses --- competition/classifier.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/competition/classifier.py b/competition/classifier.py index 0e0878a..11e4838 100644 --- a/competition/classifier.py +++ b/competition/classifier.py @@ -8,19 +8,37 @@ def ip_to_bin(ip): return (int(parts[0]) << 24) + (int(parts[1]) << 16) + (int(parts[2]) << 8) + int(parts[3]) +# Postprocessing data - decode numerical values to ip addresses +def bin_to_ip(ipnum): + o1 = int(ipnum / 16777216) % 256 + o2 = int(ipnum / 65536) % 256 + o3 = int(ipnum / 256) % 256 + o4 = int(ipnum) % 256 + return '%(o1)s.%(o2)s.%(o3)s.%(o4)s' % locals() + + # 1. Import data data = pd.read_csv('input.csv', converters={ 'sourceIPAddress': lambda x1: ip_to_bin(x1), 'destinationIPAddress': lambda x2: ip_to_bin(x2) }) +# Save flowStartMilliseconds column for later insertion +flowStartMillisecondsColumn = data['flowStartMilliseconds'] +# Remove flowStartMilliseconds column +data.drop(['flowStartMilliseconds'], 1, inplace=True) x = data.to_numpy() # 2. Loading a trained model and predict model = pickle.load(open('network_traffic_classifier.sav', 'rb')) y_pred = model.predict(x) -data['label'] = y_pred +data['sublabel'] = y_pred +# Insert flowStartMilliseconds column again +data.insert(loc=0, column='flowStartMilliseconds', value=flowStartMillisecondsColumn) +# Convert numerical IPs back to proper IP addresses +data['sourceIPAddress'] = data['sourceIPAddress'].apply(bin_to_ip) +data['destinationIPAddress'] = data['destinationIPAddress'].apply(bin_to_ip) print(data) # 3. Save output file