Add random forest classifier

2021-06-05 15:16:36 +02:00 · 2021-06-05 15:16:36 +02:00 · ddc9ce5c20
commit ddc9ce5c20
parent cbcf115a0e
1 changed files with 50 additions and 0 deletions
--- a/competition/random_forest.py
+++ b/competition/random_forest.py
@ -0,0 +1,50 @@
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sn
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import classification_report, confusion_matrix
 from sklearn.model_selection import train_test_split
 def ip_to_bin(x):
    parts = x.split('.')
    return (int(parts[0]) << 24) + (int(parts[1]) << 16) + (int(parts[2]) << 8) + int(parts[3])
 df = pd.read_csv('training4tuplabeled.csv',
                 converters={
                     'sourceIPAddress': lambda x: ip_to_bin(x),
                     'destinationIPAddress': lambda x: ip_to_bin(x)
                 })
 df.drop(['flowStartMilliseconds'], 1, inplace=True)
 X = np.array(df.drop(columns=['sublabel']))
 y = np.array(df['sublabel'])
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)
 clf = RandomForestClassifier(n_estimators=50, n_jobs=-1, criterion='gini', random_state=0, class_weight="balanced")
 clf.fit(X_train, y_train)
 accuracy = clf.score(X_test, y_test)
 print('Accuracy: ', accuracy)
 y_pred_train = clf.predict(X_train)
 y_pred_test = clf.predict(X_test)
 print("\n *************** TRAINING ****************")
 cm_train = confusion_matrix(y_train, y_pred_train)
 plt.figure(figsize=(10, 7))
 sn.heatmap(cm_train, annot=True)
 plt.xlabel('Truth')
 plt.ylabel('Predicted')
 plt.show()
 print(classification_report(y_train, y_pred_train))
 print("\n ************** VALIDATION ***************")
 cm_test = confusion_matrix(y_test, y_pred_test)
 plt.figure(figsize=(10, 7))
 sn.heatmap(cm_test, annot=True)
 plt.xlabel('Truth')
 plt.ylabel('Predicted')
 plt.show()
 print(classification_report(y_test, y_pred_test))
 example_measure = np.array([ip_to_bin('2.1.1.1'), ip_to_bin('2.1.1.2'), 0, 0, 1])