Add preliminary thesis structure
This commit is contained in:
parent
9f4143be50
commit
32adb47b78
File diff suppressed because one or more lines are too long
@ -33,7 +33,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 2,
|
||||||
"id": "cfd472e0",
|
"id": "cfd472e0",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -99727,16 +99727,48 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 4,
|
||||||
"id": "9e57cd86",
|
"id": "9e57cd86",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Downloading labels from Label Studio...\n",
|
||||||
|
"Download complete\n",
|
||||||
|
"Loading labels for field 'ground_truth'...\n",
|
||||||
|
" 100% |█████████████████| 639/639 [1.1s elapsed, 0s remaining, 576.6 samples/s] \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"\n",
|
||||||
|
" <iframe\n",
|
||||||
|
" width=\"100%\"\n",
|
||||||
|
" height=\"800\"\n",
|
||||||
|
" src=\"http://localhost:5151/?context=ipython&subscription=f8354b3b-60f0-418a-a49d-f664312d58cc\"\n",
|
||||||
|
" frameborder=\"0\"\n",
|
||||||
|
" allowfullscreen\n",
|
||||||
|
" \n",
|
||||||
|
" ></iframe>\n",
|
||||||
|
" "
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.lib.display.IFrame at 0x7fe94ac02bd0>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"anno_key = \"labelstudio_basic_recipe\"\n",
|
"anno_key = \"labelstudio_basic_recipe\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Merge annotations back into FiftyOne dataset\n",
|
"# Merge annotations back into FiftyOne dataset\n",
|
||||||
"dataset = fo.load_dataset(\"dataset\")\n",
|
"dataset = fo.load_dataset(\"dataset\")\n",
|
||||||
"dataset.load_annotations(anno_key)\n",
|
"dataset.load_annotations(anno_key, url=LABEL_STUDIO_URL, api_key=API_KEY)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Load the view that was annotated in the App\n",
|
"# Load the view that was annotated in the App\n",
|
||||||
"view = dataset.load_annotation_view(anno_key)\n",
|
"view = dataset.load_annotation_view(anno_key)\n",
|
||||||
@ -99745,12 +99777,45 @@
|
|||||||
"# Step 6: Cleanup\n",
|
"# Step 6: Cleanup\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Delete tasks from Label Studio\n",
|
"# Delete tasks from Label Studio\n",
|
||||||
"results = dataset.load_annotation_results(anno_key)\n",
|
"#results = dataset.load_annotation_results(anno_key)\n",
|
||||||
"results.cleanup()\n",
|
"#results.cleanup()\n",
|
||||||
|
"\n",
|
||||||
|
"# Delete run record (not the labels) from FiftyOne\n",
|
||||||
|
"#dataset.delete_annotation_run(anno_key)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "65f64f8b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Delete tasks from Label Studio\n",
|
||||||
|
"#results = dataset.load_annotation_results(anno_key)\n",
|
||||||
|
"#results.cleanup()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Delete run record (not the labels) from FiftyOne\n",
|
"# Delete run record (not the labels) from FiftyOne\n",
|
||||||
"dataset.delete_annotation_run(anno_key)"
|
"dataset.delete_annotation_run(anno_key)"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "ef4fd54f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dataset.save()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b099682d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
BIN
thesis/graphics/APmodel-model-optimized-relabeled.pdf
Normal file
BIN
thesis/graphics/APmodel-model-optimized-relabeled.pdf
Normal file
Binary file not shown.
BIN
thesis/graphics/APmodel-model-original-relabeled.pdf
Normal file
BIN
thesis/graphics/APmodel-model-original-relabeled.pdf
Normal file
Binary file not shown.
BIN
thesis/graphics/APmodel-relabeled.pdf
Normal file
BIN
thesis/graphics/APmodel-relabeled.pdf
Normal file
Binary file not shown.
BIN
thesis/graphics/APmodel-yolo-original-resnet-final-relabeled.pdf
Normal file
BIN
thesis/graphics/APmodel-yolo-original-resnet-final-relabeled.pdf
Normal file
Binary file not shown.
BIN
thesis/graphics/CMmodel-relabeled.pdf
Normal file
BIN
thesis/graphics/CMmodel-relabeled.pdf
Normal file
Binary file not shown.
Binary file not shown.
@ -47,6 +47,8 @@
|
|||||||
\nonzeroparskip % Create space between paragraphs (optional).
|
\nonzeroparskip % Create space between paragraphs (optional).
|
||||||
\setlength{\parindent}{0pt} % Remove paragraph identation (optional).
|
\setlength{\parindent}{0pt} % Remove paragraph identation (optional).
|
||||||
|
|
||||||
|
\setcounter{tocdepth}{3}
|
||||||
|
|
||||||
\makeindex % Use an optional index.
|
\makeindex % Use an optional index.
|
||||||
\makeglossaries % Use an optional glossary.
|
\makeglossaries % Use an optional glossary.
|
||||||
%\glstocfalse % Remove the glossaries from the table of contents.
|
%\glstocfalse % Remove the glossaries from the table of contents.
|
||||||
@ -117,18 +119,45 @@
|
|||||||
% Switch to arabic numbering and start the enumeration of chapters in the table of content.
|
% Switch to arabic numbering and start the enumeration of chapters in the table of content.
|
||||||
\mainmatter
|
\mainmatter
|
||||||
|
|
||||||
% \chapter{Introduction}
|
\chapter{Introduction}
|
||||||
% \todo{Enter your text here.}
|
\label{chap:introduction}
|
||||||
|
|
||||||
\chapter{Evaluation}
|
\section{Motivation and Problem Statement}
|
||||||
|
\label{sec:motivation}
|
||||||
|
|
||||||
|
\section{Thesis Structure}
|
||||||
|
\label{sec:structure}
|
||||||
|
|
||||||
|
\chapter{Theoretical Background}
|
||||||
|
\label{chap:background}
|
||||||
|
|
||||||
|
\section{Object Detection}
|
||||||
|
\label{sec:background-detection}
|
||||||
|
|
||||||
|
\section{Classification}
|
||||||
|
\label{sec:background-classification}
|
||||||
|
|
||||||
|
\section{Related Work}
|
||||||
|
\label{sec:related-work}
|
||||||
|
|
||||||
|
\chapter{Prototype Development}
|
||||||
|
\label{chap:development}
|
||||||
|
|
||||||
|
\section{Object Detection}
|
||||||
|
\label{sec:development-detection}
|
||||||
|
|
||||||
|
\section{Classification}
|
||||||
|
\label{sec:Classification}
|
||||||
|
|
||||||
|
\chapter{Results}
|
||||||
|
\label{chap:results}
|
||||||
|
|
||||||
The following sections contain a detailed evaluation of the model in
|
The following sections contain a detailed evaluation of the model in
|
||||||
various scenarios. First, we present metrics from the training phases
|
various scenarios. First, we present metrics from the training phases
|
||||||
of the constituent models. Second, we employ methods from the field of
|
of the constituent models. Second, we employ methods from the field of
|
||||||
\gls{xai} such as \gls{grad-cam} to get a better understanding of the
|
\gls{xai} such as \gls{grad-cam} to get a better understanding of the
|
||||||
models' abstractions. Finally, we turn to the models' aggregate
|
models' abstractions. Finally, we turn to the models' aggregate
|
||||||
performance on the test set and discuss whether the initial goals set
|
performance on the test set.
|
||||||
by the problem description have been met or not.
|
|
||||||
|
|
||||||
\section{Object Detection}
|
\section{Object Detection}
|
||||||
\label{sec:yolo-eval}
|
\label{sec:yolo-eval}
|
||||||
@ -149,7 +178,7 @@ consists of 91479 images with a roughly 85/5/10 split for training,
|
|||||||
validation and testing, respectively.
|
validation and testing, respectively.
|
||||||
|
|
||||||
\subsection{Training Phase}
|
\subsection{Training Phase}
|
||||||
\label{ssec:yolo-training-phase}
|
\label{ssec:yolo-training}
|
||||||
|
|
||||||
The object detection model was trained for 300 epochs on 79204 images
|
The object detection model was trained for 300 epochs on 79204 images
|
||||||
with 284130 ground truth labels. The weights from the best-performing
|
with 284130 ground truth labels. The weights from the best-performing
|
||||||
@ -240,7 +269,7 @@ before overfitting occurs.
|
|||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\subsection{Test Phase}
|
\subsection{Test Phase}
|
||||||
\label{ssec:yolo-test-phase}
|
\label{ssec:yolo-test}
|
||||||
|
|
||||||
Of the 91479 images around 10\% were used for the test phase. These
|
Of the 91479 images around 10\% were used for the test phase. These
|
||||||
images contain a total of 12238 ground truth
|
images contain a total of 12238 ground truth
|
||||||
@ -337,11 +366,10 @@ Figure~\ref{fig:hyp-opt-fitness} shows the model's fitness during
|
|||||||
training for each epoch. After the highest fitness of 0.6172 at epoch
|
training for each epoch. After the highest fitness of 0.6172 at epoch
|
||||||
27, the performance quickly declines and shows that further training
|
27, the performance quickly declines and shows that further training
|
||||||
would likely not yield improved results. The model converges to its
|
would likely not yield improved results. The model converges to its
|
||||||
highest fitness much earlier than the non-optimized version discussed
|
highest fitness much earlier than the non-optimized version, which
|
||||||
in section~\ref{ssec:yolo-training-phase}, which indicates that the
|
indicates that the adjusted parameters provide a better starting point
|
||||||
adjusted parameters provide a better starting point in general.
|
in general. Furthermore, the maximum fitness is 0.74\% higher than in
|
||||||
Furthermore, the maximum fitness is 0.74\% higher than in the
|
the non-optimized version.
|
||||||
non-optimized version.
|
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
@ -426,7 +454,7 @@ is lower by 1.8\%.
|
|||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\section{Classification}
|
\section{Classification}
|
||||||
\label{sec:resnet-eval}
|
\label{sec:classifier-eval}
|
||||||
|
|
||||||
The classifier receives cutouts from the object detection model and
|
The classifier receives cutouts from the object detection model and
|
||||||
determines whether the image shows a stressed plant or not. To achieve
|
determines whether the image shows a stressed plant or not. To achieve
|
||||||
@ -448,7 +476,7 @@ regarding training and inference time as well as required space. The
|
|||||||
50 layer architecture (\gls{resnet}50) is adequate for our use case.
|
50 layer architecture (\gls{resnet}50) is adequate for our use case.
|
||||||
|
|
||||||
\subsection{Training Phase}
|
\subsection{Training Phase}
|
||||||
\label{ssec:resnet-training-phase}
|
\label{ssec:classifier-training}
|
||||||
|
|
||||||
The dataset was split 85/15 into training and validation sets. The
|
The dataset was split 85/15 into training and validation sets. The
|
||||||
images in the training set were augmented with a random crop to arrive
|
images in the training set were augmented with a random crop to arrive
|
||||||
@ -481,15 +509,15 @@ feature extraction capabilities.
|
|||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\subsection{Hyper-parameter Optimization}
|
\subsection{Hyper-parameter Optimization}
|
||||||
\label{ssec:resnet-hyp-opt}
|
\label{ssec:classifier-hyp-opt}
|
||||||
|
|
||||||
In order to improve the aforementioned accuracy values, we perform
|
In order to improve the aforementioned accuracy values, we perform
|
||||||
hyper-parameter optimization across a wide range of
|
hyper-parameter optimization across a wide range of
|
||||||
parameters. Table~\ref{tab:resnet-hyps} lists the hyper-parameters and
|
parameters. Table~\ref{tab:classifier-hyps} lists the hyper-parameters
|
||||||
their possible values. Since the number of all combinations of values
|
and their possible values. Since the number of all combinations of
|
||||||
is 11520 and each combination is trained for 10 epochs with a training
|
values is 11520 and each combination is trained for 10 epochs with a
|
||||||
time of approximately six minutes per combination, exhausting the
|
training time of approximately six minutes per combination, exhausting
|
||||||
search space would take 48 days. Due to time limitations, we have
|
the search space would take 48 days. Due to time limitations, we have
|
||||||
chosen to not search exhaustively but to pick random combinations
|
chosen to not search exhaustively but to pick random combinations
|
||||||
instead. Random search works surprisingly well---especially compared to
|
instead. Random search works surprisingly well---especially compared to
|
||||||
grid search---in a number of domains, one of which is hyper-parameter
|
grid search---in a number of domains, one of which is hyper-parameter
|
||||||
@ -513,13 +541,13 @@ optimization~\cite{bergstra2012}.
|
|||||||
\end{tabular}
|
\end{tabular}
|
||||||
\caption{Hyper-parameters and their possible values during
|
\caption{Hyper-parameters and their possible values during
|
||||||
optimization.}
|
optimization.}
|
||||||
\label{tab:resnet-hyps}
|
\label{tab:classifier-hyps}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
The random search was run for 138 iterations which equates to a 75\%
|
The random search was run for 138 iterations which equates to a 75\%
|
||||||
probability that the best solution lies within 1\% of the theoretical
|
probability that the best solution lies within 1\% of the theoretical
|
||||||
maximum~\eqref{eq:opt-prob}. Figure~\ref{fig:resnet-hyp-results} shows
|
maximum~\eqref{eq:opt-prob}. Figure~\ref{fig:classifier-hyp-results}
|
||||||
three of the eight parameters and their impact on a high
|
shows three of the eight parameters and their impact on a high
|
||||||
F1-score. \gls{sgd} has less variation in its results than
|
F1-score. \gls{sgd} has less variation in its results than
|
||||||
Adam~\cite{kingma2017} and manages to provide eight out of the ten
|
Adam~\cite{kingma2017} and manages to provide eight out of the ten
|
||||||
best results. The number of epochs to train for was chosen based on
|
best results. The number of epochs to train for was chosen based on
|
||||||
@ -549,10 +577,10 @@ figure~\ref{fig:classifier-training-metrics}.
|
|||||||
produced the best iteration with an F1-score of 0.9783. Adam tends
|
produced the best iteration with an F1-score of 0.9783. Adam tends
|
||||||
to require more customization of its parameters than \gls{sgd} to
|
to require more customization of its parameters than \gls{sgd} to
|
||||||
achieve good results.}
|
achieve good results.}
|
||||||
\label{fig:resnet-hyp-results}
|
\label{fig:classifier-hyp-results}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
Table~\ref{tab:resnet-final-hyps} lists the final hyper-parameters
|
Table~\ref{tab:classifier-final-hyps} lists the final hyper-parameters
|
||||||
which were chosen to train the improved model. In order to confirm
|
which were chosen to train the improved model. In order to confirm
|
||||||
that the model does not suffer from overfitting or is a product of
|
that the model does not suffer from overfitting or is a product of
|
||||||
chance due to a coincidentally advantageous train/test split, we
|
chance due to a coincidentally advantageous train/test split, we
|
||||||
@ -579,10 +607,10 @@ is robust against variations in the training set.
|
|||||||
\end{tabular}
|
\end{tabular}
|
||||||
\caption[Hyper-parameters for the optimized classifier.]{Chosen
|
\caption[Hyper-parameters for the optimized classifier.]{Chosen
|
||||||
hyper-parameters for the final, improved model. The difference to
|
hyper-parameters for the final, improved model. The difference to
|
||||||
the parameters listed in Table~\ref{tab:resnet-hyps} comes as a
|
the parameters listed in Table~\ref{tab:classifier-hyps} comes as
|
||||||
result of choosing \gls{sgd} over Adam. The missing four
|
a result of choosing \gls{sgd} over Adam. The missing four
|
||||||
parameters are only required for Adam and not \gls{sgd}.}
|
parameters are only required for Adam and not \gls{sgd}.}
|
||||||
\label{tab:resnet-final-hyps}
|
\label{tab:classifier-final-hyps}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
@ -636,7 +664,7 @@ F1-score of 1 on the training set.
|
|||||||
|
|
||||||
|
|
||||||
\subsection{Class Activation Maps}
|
\subsection{Class Activation Maps}
|
||||||
\label{ssec:resnet-cam}
|
\label{ssec:classifier-cam}
|
||||||
|
|
||||||
Neural networks are notorious for their black-box behavior, where it
|
Neural networks are notorious for their black-box behavior, where it
|
||||||
is possible to observe the inputs and the corresponding outputs, but
|
is possible to observe the inputs and the corresponding outputs, but
|
||||||
@ -666,7 +694,7 @@ become progressively worse as we move to earlier convolutional layers
|
|||||||
as they have smaller receptive fields and only focus on less semantic
|
as they have smaller receptive fields and only focus on less semantic
|
||||||
local features.''~\cite[p.5]{selvaraju2020}
|
local features.''~\cite[p.5]{selvaraju2020}
|
||||||
|
|
||||||
Turning to our classifier, figure~\ref{fig:resnet-cam} shows the
|
Turning to our classifier, figure~\ref{fig:classifier-cam} shows the
|
||||||
\glspl{cam} for \emph{healthy} and \emph{stressed}. While the regions
|
\glspl{cam} for \emph{healthy} and \emph{stressed}. While the regions
|
||||||
of interest for the \emph{healthy} class lie on the healthy plant, the
|
of interest for the \emph{healthy} class lie on the healthy plant, the
|
||||||
\emph{stressed} plant is barely considered and mostly rendered as
|
\emph{stressed} plant is barely considered and mostly rendered as
|
||||||
@ -675,8 +703,8 @@ inputs to the \emph{stressed} classification, the regions of interest
|
|||||||
predominantly stay on the thirsty as opposed to the healthy plant. In
|
predominantly stay on the thirsty as opposed to the healthy plant. In
|
||||||
fact, the large hanging leaves play a significant role in determining
|
fact, the large hanging leaves play a significant role in determining
|
||||||
the class the image belongs to. This is an additional data point
|
the class the image belongs to. This is an additional data point
|
||||||
confirming that the model focuses on the \emph{right} parts of the
|
confirming that the model focuses on the semantically meaningful parts
|
||||||
image during classification.
|
of the image during classification.
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
@ -691,7 +719,7 @@ image during classification.
|
|||||||
class. The classifier focuses on the hanging leaves of the thirsty
|
class. The classifier focuses on the hanging leaves of the thirsty
|
||||||
plant. The image was classified as \emph{stressed} with a
|
plant. The image was classified as \emph{stressed} with a
|
||||||
confidence of 70\%.}
|
confidence of 70\%.}
|
||||||
\label{fig:resnet-cam}
|
\label{fig:classifier-cam}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
@ -727,20 +755,23 @@ the labels allowed to include more images in the test set because they
|
|||||||
could be labeled more easily. Additionally, going over the detections
|
could be labeled more easily. Additionally, going over the detections
|
||||||
and classifications provided a comprehensive view on how the models
|
and classifications provided a comprehensive view on how the models
|
||||||
work and what their weaknesses and strengths are. After the labels
|
work and what their weaknesses and strengths are. After the labels
|
||||||
have been corrected, the ground truth of the test set contains 662
|
have been corrected, the ground truth of the test set contains 766
|
||||||
bounding boxes of healthy plants and 488 of stressed plants.
|
bounding boxes of healthy plants and 494 of stressed plants.
|
||||||
|
|
||||||
|
\subsection{Non-optimized Model}
|
||||||
|
\label{ssec:model-non-optimized}
|
||||||
|
|
||||||
\begin{table}
|
\begin{table}
|
||||||
\centering
|
\centering
|
||||||
\begin{tabular}{lrrrr}
|
\begin{tabular}{lrrrr}
|
||||||
\toprule
|
\toprule
|
||||||
{} & Precision & Recall & F1-score & Support \\
|
{} & precision & recall & f1-score & support \\
|
||||||
\midrule
|
\midrule
|
||||||
Healthy & 0.824 & 0.745 & 0.783 & 662.0 \\
|
Healthy & 0.665 & 0.554 & 0.604 & 766 \\
|
||||||
Stressed & 0.707 & 0.783 & 0.743 & 488.0 \\
|
Stressed & 0.639 & 0.502 & 0.562 & 494 \\
|
||||||
micro avg & 0.769 & 0.761 & 0.765 & 1150.0 \\
|
micro avg & 0.655 & 0.533 & 0.588 & 1260 \\
|
||||||
macro avg & 0.766 & 0.764 & 0.763 & 1150.0 \\
|
macro avg & 0.652 & 0.528 & 0.583 & 1260 \\
|
||||||
weighted avg & 0.775 & 0.761 & 0.766 & 1150.0 \\
|
weighted avg & 0.655 & 0.533 & 0.588 & 1260 \\
|
||||||
\bottomrule
|
\bottomrule
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\caption{Precision, recall and F1-score for the aggregate model.}
|
\caption{Precision, recall and F1-score for the aggregate model.}
|
||||||
@ -748,41 +779,39 @@ bounding boxes of healthy plants and 488 of stressed plants.
|
|||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
Table~\ref{tab:model-metrics} shows precision, recall and the F1-score
|
Table~\ref{tab:model-metrics} shows precision, recall and the F1-score
|
||||||
for both classes \emph{Healthy} and \emph{Stressed}. Both precision
|
for both classes \emph{Healthy} and \emph{Stressed}. Precision is
|
||||||
and recall are balanced and the F1-score is high. Unfortunately, these
|
higher than recall for both classes and the F1-score is at
|
||||||
values do not take the accuracy of bounding boxes into account and
|
0.59. Unfortunately, these values do not take the accuracy of bounding
|
||||||
thus have only limited expressive power.
|
boxes into account and thus have only limited expressive power.
|
||||||
|
|
||||||
Figure~\ref{fig:aggregate-ap} shows the precision and recall curves
|
Figure~\ref{fig:aggregate-ap} shows the precision and recall curves
|
||||||
for both classes at different \gls{iou} thresholds. The left plot
|
for both classes at different \gls{iou} thresholds. The left plot
|
||||||
shows the \gls{ap} for each class at the threshold of 0.5 and the
|
shows the \gls{ap} for each class at the threshold of 0.5 and the
|
||||||
right one at 0.95. The \gls{map} is 0.6226 and calculated across all
|
right one at 0.95. The \gls{map} is 0.3581 and calculated across all
|
||||||
classes as the median of the \gls{iou} thresholds from 0.5 to 0.95 in
|
classes as the median of the \gls{iou} thresholds from 0.5 to 0.95 in
|
||||||
0.05 steps. The difference between \gls{map}@0.5 and \gls{map}@0.95 is
|
0.05 steps. The cliffs at around 0.6 (left) and 0.3 (right) happen at
|
||||||
fairly small which indicates that the bounding boxes encapsulate the
|
a detection threshold of 0.5. The classifier's last layer is a softmax
|
||||||
objects of interest well. The cliffs at around 0.77 (left) and 0.7
|
layer which necessarily transforms the input into a probability of
|
||||||
(right) happen at a detection threshold of 0.5. The classifier's last
|
showing either a healthy or stressed plant. If the probability of an
|
||||||
layer is a softmax layer which necessarily transforms the input into a
|
image showing a healthy plant is below 0.5, it is no longer classified
|
||||||
probability of showing either a healthy or stressed plant. If the
|
as healthy but as stressed. The threshold for discriminating the two
|
||||||
probability of an image showing a healthy plant is below 0.5, it is no
|
classes lies at the 0.5 value and is therefore the cutoff for either
|
||||||
longer classified as healthy but as stressed. The threshold for
|
class.
|
||||||
discriminating the two classes lies at the 0.5 value and is therefore
|
|
||||||
the cutoff for either class.
|
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includegraphics{graphics/APmodel.pdf}
|
\includegraphics{graphics/APmodel-model-optimized-relabeled.pdf}
|
||||||
\caption[Aggregate model AP@0.5 and AP@0.95.]{Precision-recall
|
\caption[Aggregate model AP@0.5 and AP@0.95.]{Precision-recall
|
||||||
curves for \gls{iou} thresholds of 0.5 and 0.95. The \gls{ap} of a
|
curves for \gls{iou} thresholds of 0.5 and 0.95. The \gls{ap} of a
|
||||||
specific threshold is defined as the area under the
|
specific threshold is defined as the area under the
|
||||||
precision-recall curve of that threshold. The \gls{map} across
|
precision-recall curve of that threshold. The \gls{map} across
|
||||||
\gls{iou} thresholds from 0.5 to 0.95 in 0.05 steps
|
\gls{iou} thresholds from 0.5 to 0.95 in 0.05 steps
|
||||||
\textsf{mAP}@0.5:0.95 is 0.6226.}
|
\textsf{mAP}@0.5:0.95 is 0.3581.}
|
||||||
\label{fig:aggregate-ap}
|
\label{fig:aggregate-ap}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\subsection{Hyper-parameter Optimization}
|
\subsection{Optimized Model}
|
||||||
\label{ssec:model-hyp-opt}
|
\label{ssec:model-optimized}
|
||||||
|
|
||||||
So far the metrics shown in table~\ref{tab:model-metrics} are obtained
|
So far the metrics shown in table~\ref{tab:model-metrics} are obtained
|
||||||
with the non-optimized versions of both the object detection and
|
with the non-optimized versions of both the object detection and
|
||||||
@ -790,7 +819,7 @@ classification model. Hyper-parameter optimization of the classifier
|
|||||||
led to significant model improvements, while the object detector has
|
led to significant model improvements, while the object detector has
|
||||||
improved precision but lower recall and slightly lower \gls{map}
|
improved precision but lower recall and slightly lower \gls{map}
|
||||||
values. To evaluate the final aggregate model which consists of the
|
values. To evaluate the final aggregate model which consists of the
|
||||||
individual optimized models, we run the same test as in
|
individual optimized models, we run the same test described in
|
||||||
section~\ref{sec:aggregate-model}.
|
section~\ref{sec:aggregate-model}.
|
||||||
|
|
||||||
\begin{table}
|
\begin{table}
|
||||||
@ -799,11 +828,11 @@ section~\ref{sec:aggregate-model}.
|
|||||||
\toprule
|
\toprule
|
||||||
{} & precision & recall & f1-score & support \\
|
{} & precision & recall & f1-score & support \\
|
||||||
\midrule
|
\midrule
|
||||||
Healthy & 0.664 & 0.640 & 0.652 & 662.0 \\
|
Healthy & 0.711 & 0.555 & 0.623 & 766 \\
|
||||||
Stressed & 0.680 & 0.539 & 0.601 & 488.0 \\
|
Stressed & 0.570 & 0.623 & 0.596 & 494 \\
|
||||||
micro avg & 0.670 & 0.597 & 0.631 & 1150.0 \\
|
micro avg & 0.644 & 0.582 & 0.611 & 1260 \\
|
||||||
macro avg & 0.672 & 0.590 & 0.626 & 1150.0 \\
|
macro avg & 0.641 & 0.589 & 0.609 & 1260 \\
|
||||||
weighted avg & 0.670 & 0.597 & 0.630 & 1150.0 \\
|
weighted avg & 0.656 & 0.582 & 0.612 & 1260 \\
|
||||||
\bottomrule
|
\bottomrule
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\caption{Precision, recall and F1-score for the optimized aggregate
|
\caption{Precision, recall and F1-score for the optimized aggregate
|
||||||
@ -813,63 +842,38 @@ section~\ref{sec:aggregate-model}.
|
|||||||
|
|
||||||
Table~\ref{tab:model-metrics-hyp} shows precision, recall and F1-score
|
Table~\ref{tab:model-metrics-hyp} shows precision, recall and F1-score
|
||||||
for the optimized model on the same test dataset of 640 images. All of
|
for the optimized model on the same test dataset of 640 images. All of
|
||||||
the metrics are significantly worse than for the non-optimized
|
the metrics are better for the optimized model. In particular,
|
||||||
model. Considering that the optimized classifier performs better than
|
precision for the healthy class could be improved significantly while
|
||||||
the non-optimized version this is a surprising result. There are
|
recall remains at the same level. This results in a better F1-score
|
||||||
multiple possible explanations for this behavior:
|
for the healthy class. Precision for the stressed class is lower with
|
||||||
|
the optimized model, but recall is significantly higher (0.502
|
||||||
\begin{enumerate}
|
vs. 0.623). The higher recall results in a 3\% gain for the F1-score
|
||||||
\item The optimized classifier has worse generalizability than the
|
in the stressed class. Overall, precision is the same but recall has
|
||||||
non-optimized version.
|
improved significantly, which also results in a noticeable improvement
|
||||||
\item The small difference in the \gls{map} values for the object
|
for the average F1-score across both classes.
|
||||||
detection model result in significantly higher error rates
|
|
||||||
overall. This might be the case because a large number of plants is
|
|
||||||
not detected in the first place and/or those which are detected are
|
|
||||||
more often not classified correctly by the classifier. As mentioned
|
|
||||||
in section~\ref{ssec:yolo-hyp-opt}, running the evolution of the
|
|
||||||
hyper-parameters for more generations could better the performance
|
|
||||||
overall.
|
|
||||||
\item The test dataset is tailored to the non-optimized version and
|
|
||||||
does not provide an accurate measure of real-world performance. The
|
|
||||||
test dataset was labeled by running the individual models on the
|
|
||||||
images and taking the predicted bounding boxes and labels as a
|
|
||||||
starting point for the labeling process. If the labels were not
|
|
||||||
rigorously corrected, the dataset will allow the non-optimized model
|
|
||||||
to achieve high scores because the labels are already in line with
|
|
||||||
what it predicts. Conversely, the optimized model might get closer
|
|
||||||
to the actual ground truth, but that truth is not what is specified
|
|
||||||
by the labels to begin with. If that is the case, the evaluation of
|
|
||||||
the non-optimized model is too favorably and should be corrected
|
|
||||||
down.
|
|
||||||
\end{enumerate}
|
|
||||||
|
|
||||||
Of these three possibilities, the second and third points are the most
|
|
||||||
likely culprits. The first scenario is unlikely because the optimized
|
|
||||||
classifier has been evaluated in a cross validation setting and the
|
|
||||||
results do not lend themselves easily to such an
|
|
||||||
interpretation. Dealing with the second scenario could allow the
|
|
||||||
object detection model to perform better on its own, but would
|
|
||||||
probably not explain the big difference in performance. Scenario three
|
|
||||||
is the most likely one because the process of creating the test
|
|
||||||
dataset can lead to favorable labels for the non-optimized model.
|
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includegraphics{graphics/APmodel-final.pdf}
|
\includegraphics{graphics/APModel-model-original-relabeled.pdf}
|
||||||
\caption[Optimized aggregate model AP@0.5 and
|
\caption[Optimized aggregate model AP@0.5 and
|
||||||
AP@0.95.]{Precision-recall curves for \gls{iou} thresholds of 0.5
|
AP@0.95.]{Precision-recall curves for \gls{iou} thresholds of 0.5
|
||||||
and 0.95. The \gls{ap} of a specific threshold is defined as the
|
and 0.95. The \gls{ap} of a specific threshold is defined as the
|
||||||
area under the precision-recall curve of that threshold. The
|
area under the precision-recall curve of that threshold. The
|
||||||
\gls{map} across \gls{iou} thresholds from 0.5 to 0.95 in 0.05
|
\gls{map} across \gls{iou} thresholds from 0.5 to 0.95 in 0.05
|
||||||
steps \textsf{mAP}@0.5:0.95 is 0.4426.}
|
steps \textsf{mAP}@0.5:0.95 is 0.3838.}
|
||||||
\label{fig:aggregate-ap-hyp}
|
\label{fig:aggregate-ap-hyp}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
Figure~\ref{fig:aggregate-ap-hyp} confirms the suspicions raised by
|
Figure~\ref{fig:aggregate-ap-hyp} confirms the performance increase of
|
||||||
the lower metrics from table~\ref{tab:model-metrics-hyp}. More
|
the optimized model established in
|
||||||
iterations for the evolution of the object detection model would
|
table~\ref{tab:model-metrics-hyp}. The \textsf{mAP}@0.5 is higher for
|
||||||
likely have a significant effect on \gls{iou} and the confidence
|
both classes, indicating that the model better detects plants in
|
||||||
values associated with the bounding boxes.
|
general. The \textsf{mAP}@0.95 is slightly lower for the healthy
|
||||||
|
class, which means that the confidence for the healthy class is
|
||||||
|
slightly lower compared to the non-optimized model. The result is that
|
||||||
|
more plants are correctly detected and classified overall, but the
|
||||||
|
confidence scores tend to be lower with the optimized model. The
|
||||||
|
\textsf{mAP}@0.5:0.95 could be improved by about 0.025.
|
||||||
|
|
||||||
\backmatter
|
\backmatter
|
||||||
|
|
||||||
@ -898,4 +902,7 @@ values associated with the bounding boxes.
|
|||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
%%% mode: latex
|
%%% mode: latex
|
||||||
%%% TeX-master: t
|
%%% TeX-master: t
|
||||||
|
%%% TeX-master: t
|
||||||
|
%%% TeX-master: t
|
||||||
|
%%% TeX-master: "thesis"
|
||||||
%%% End:
|
%%% End:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user