37 lines
1.3 KiB
Python
37 lines
1.3 KiB
Python
|
|
# 1. Importing CSV data for training in pandas dataframes
|
|
import pandas as pd
|
|
data = pd.read_csv("iris_base.csv")
|
|
|
|
# 2. Separating labels from data
|
|
y = data["label"]
|
|
data = data.drop(columns=["label"])
|
|
x = data.to_numpy()
|
|
|
|
# 3. Splitting data into training/test subsets for model training and validation
|
|
from sklearn.model_selection import train_test_split
|
|
x_train, x_test, y_train, y_test = train_test_split(data, y, test_size=0.2, stratify=y)
|
|
|
|
# 4. Fitting a Naive Gaussian classifier with the training split
|
|
from sklearn.naive_bayes import GaussianNB
|
|
gnb = GaussianNB()
|
|
gnb.fit(x_train,y_train)
|
|
|
|
# 5. The obtained model is tested with both the training and test split
|
|
# to ensure no underfitting and overfitting issues
|
|
y_pred_train = gnb.predict(x_train)
|
|
y_pred_test = gnb.predict(x_test)
|
|
from sklearn.metrics import classification_report, confusion_matrix
|
|
print("\n *************** TRAINING ****************")
|
|
print("\n Confusion matrix:")
|
|
print(confusion_matrix(y_train, y_pred_train))
|
|
print(classification_report(y_train,y_pred_train))
|
|
print("\n ************** VALIDATION ***************")
|
|
print("\n Confusion matrix:")
|
|
print(confusion_matrix(y_test, y_pred_test))
|
|
print(classification_report(y_test,y_pred_test))
|
|
|
|
# 6. Saving the obtained model
|
|
import pickle
|
|
pickle.dump(gnb, open('iris_classif_model.sav', 'wb'))
|