# 1. Importing CSV data for training in pandas dataframes import pandas as pd data = pd.read_csv("iris_base.csv") # 2. Separating labels from data y = data["label"] data = data.drop(columns=["label"]) x = data.to_numpy() # 3. Splitting data into training/test subsets for model training and validation from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(data, y, test_size=0.2, stratify=y) # 4. Fitting a Naive Gaussian classifier with the training split from sklearn.naive_bayes import GaussianNB gnb = GaussianNB() gnb.fit(x_train,y_train) # 5. The obtained model is tested with both the training and test split # to ensure no underfitting and overfitting issues y_pred_train = gnb.predict(x_train) y_pred_test = gnb.predict(x_test) from sklearn.metrics import classification_report, confusion_matrix print("\n *************** TRAINING ****************") print("\n Confusion matrix:") print(confusion_matrix(y_train, y_pred_train)) print(classification_report(y_train,y_pred_train)) print("\n ************** VALIDATION ***************") print("\n Confusion matrix:") print(confusion_matrix(y_test, y_pred_test)) print(classification_report(y_test,y_pred_test)) # 6. Saving the obtained model import pickle pickle.dump(gnb, open('iris_classif_model.sav', 'wb'))