import pandas as pd import pickle from sklearn.preprocessing import LabelEncoder # Source: https://stackoverflow.com/questions/24458645/label-encoding-across-multiple-columns-in-scikit-learn class MultiColumnLabelEncoder: def __init__(self, columns=None): self.columns = columns # array of column names to encode def fit(self, X, y=None): return self # not relevant here def transform(self, X): ''' Transforms columns of X specified in self.columns using LabelEncoder(). If no columns specified, transforms all columns in X. ''' output = X.copy() if self.columns is not None: for col in self.columns: output[col] = LabelEncoder().fit_transform(output[col]) else: for colname, col in output.iteritems(): output[colname] = LabelEncoder().fit_transform(col) return output def fit_transform(self, X, y=None): return self.fit(X, y).transform(X) # 1. Import data data = pd.read_csv('input.csv') data_end = pd.read_csv('input.csv') data = MultiColumnLabelEncoder(columns=['sourceIPAddress', 'destinationIPAddress']).fit_transform(data) x = data.to_numpy() # 2. Loading a trained model and predict model = pickle.load(open('network_traffic_classifier.sav', 'rb')) y_pred = model.predict(x) # 3. Add predictions and save output file data_end['sublabel'] = y_pred data_end.to_csv('output.csv', index=False)