Convert numerical IPs back to addresses

This commit is contained in:
Tobias Eidelpes 2021-06-05 18:44:20 +02:00
parent bd9d3b6932
commit 7f8a6a76e8

View File

@ -8,19 +8,37 @@ def ip_to_bin(ip):
return (int(parts[0]) << 24) + (int(parts[1]) << 16) + (int(parts[2]) << 8) + int(parts[3]) return (int(parts[0]) << 24) + (int(parts[1]) << 16) + (int(parts[2]) << 8) + int(parts[3])
# Postprocessing data - decode numerical values to ip addresses
def bin_to_ip(ipnum):
o1 = int(ipnum / 16777216) % 256
o2 = int(ipnum / 65536) % 256
o3 = int(ipnum / 256) % 256
o4 = int(ipnum) % 256
return '%(o1)s.%(o2)s.%(o3)s.%(o4)s' % locals()
# 1. Import data # 1. Import data
data = pd.read_csv('input.csv', data = pd.read_csv('input.csv',
converters={ converters={
'sourceIPAddress': lambda x1: ip_to_bin(x1), 'sourceIPAddress': lambda x1: ip_to_bin(x1),
'destinationIPAddress': lambda x2: ip_to_bin(x2) 'destinationIPAddress': lambda x2: ip_to_bin(x2)
}) })
# Save flowStartMilliseconds column for later insertion
flowStartMillisecondsColumn = data['flowStartMilliseconds']
# Remove flowStartMilliseconds column
data.drop(['flowStartMilliseconds'], 1, inplace=True)
x = data.to_numpy() x = data.to_numpy()
# 2. Loading a trained model and predict # 2. Loading a trained model and predict
model = pickle.load(open('network_traffic_classifier.sav', 'rb')) model = pickle.load(open('network_traffic_classifier.sav', 'rb'))
y_pred = model.predict(x) y_pred = model.predict(x)
data['label'] = y_pred data['sublabel'] = y_pred
# Insert flowStartMilliseconds column again
data.insert(loc=0, column='flowStartMilliseconds', value=flowStartMillisecondsColumn)
# Convert numerical IPs back to proper IP addresses
data['sourceIPAddress'] = data['sourceIPAddress'].apply(bin_to_ip)
data['destinationIPAddress'] = data['destinationIPAddress'].apply(bin_to_ip)
print(data) print(data)
# 3. Save output file # 3. Save output file