Add deployment section of implementation

2023-12-08 17:23:31 +01:00 · 2023-12-08 17:23:31 +01:00 · 326562ca85
commit 326562ca85
parent 6267db9485
3 changed files with 157 additions and 93 deletions
--- a/thesis/references.bib
+++ b/thesis/references.bib
@ -241,6 +241,17 @@
  pages = {399--402}
 }

+@online{chan2020,
+  title = {Healthy and {{Wilted Houseplant Images}}},
+  author = {Chan, Russell},
+  date = {2020-01-17},
+  url = {https://www.kaggle.com/datasets/russellchan/healthy-and-wilted-houseplant-images},
+  urldate = {2023-12-08},
+  abstract = {A collection of 904 houseplant images, classified as either healthy or wilted},
+  langid = {english},
+  file = {/home/zenon/Zotero/storage/KDVV3SVG/healthy-and-wilted-houseplant-images.html}
+}
+
@article{chandel2021,
  title = {Identifying {{Crop Water Stress Using Deep Learning Models}}},
  author = {Chandel, Narendra Singh and Chakraborty, Subir Kumar and Rajwade, Yogesh Anand and Dubey, Kumkum and Tiwari, Mukesh K. and Jat, Dilip},
@ -1754,6 +1765,30 @@
  file = {/home/zenon/Zotero/storage/CLHDBTJ2/qWPwnQEACAAJ.html}
 }

+@online{zotero-368,
+  title = {Dataset {{Search}}},
+  url = {https://datasetsearch.research.google.com/search?src=2&query=Healthy%20and%20Wilted%20Houseplant%20Images&docid=L2cvMTFzc3JqZDhrNA%3D%3D},
+  urldate = {2023-12-08},
+  file = {/home/zenon/Zotero/storage/48CAYZMW/search.html}
+}
+
+@online{zotero-372,
+  title = {Healthy and {{Wilted Houseplant Images}}},
+  url = {https://www.kaggle.com/datasets/russellchan/healthy-and-wilted-houseplant-images},
+  urldate = {2023-12-08},
+  abstract = {A collection of 904 houseplant images, classified as either healthy or wilted},
+  langid = {english},
+  file = {/home/zenon/Zotero/storage/2EDXR4MQ/datasets.html}
+}
+
+@software{zotero-374,
+  title = {Open {{Neural Network Exchange}}},
+  url = {https://github.com/onnx},
+  urldate = {2023-12-08},
+  abstract = {ONNX is an open ecosystem for interoperable AI models. It's a community project: we welcome your contributions! - Open Neural Network Exchange},
+  file = {/home/zenon/Zotero/storage/GZ35DHBG/onnx.html}
+}
+
@article{zou2023,
  title = {Object {{Detection}} in 20 {{Years}}: {{A Survey}}},
  shorttitle = {Object {{Detection}} in 20 {{Years}}},
--- a/thesis/thesis.pdf
+++ b/thesis/thesis.pdf
--- a/thesis/thesis.tex
+++ b/thesis/thesis.tex
@ -140,6 +140,7 @@ Challenge}
 \newacronym{giou}{GIoU}{Generalized Intersection over Union}
 \newacronym{elan}{ELAN}{Efficient Layer Aggregation Network}
 \newacronym{eelan}{E-ELAN}{Extended Efficient Layer Aggregation Network}
+\newacronym{onnx}{ONNX}{Open Neural Network Exchange}

 \begin{document}

@ -2460,7 +2461,7 @@ detector was trained with looks like, what the results of the training
 phase are and how the model was optimized with respect to its
 hyperparameters.

-\subsection{Data Set}
+\subsection{Dataset}
 \label{ssec:obj-train-dataset}

 The object detection model has to correctly detect plants in various
@ -2692,13 +2693,16 @@ sections we describe the data set the classifier was trained on, the
 metrics of the training phase and how the performance of the model was
 further improved with hyperparameter optimization.

-\subsection{Data Set}
+\subsection{Dataset}
 \label{ssec:class-train-dataset}

 The dataset we used for training the classifier consists of \num{452}
-images of healthy and \num{452} stressed plants. 
-
-%% TODO: write about data set
+images of healthy and \num{452} stressed plants. It has been made
+public on Kaggle
+Datasets\footnote{\url{https://www.kaggle.com/datasets}} under the
+name \emph{Healthy and Wilted Houseplant Images} \cite{chan2020}. The
+images in the dataset were collected from Google Images and labeled
+accordingly.

 The dataset was split 85/15 into training and validation sets. The
 images in the training set were augmented with a random crop to arrive
@ -2761,17 +2765,18 @@ which is hyperparameter optimization \cite{bergstra2012}.
    eps & 0.00000001, 0.1, 1 \\
    \bottomrule
  \end{tabular}
-  \caption{Hyper-parameters and their possible values during
+  \caption{Hyperparameters and their possible values during
    optimization.}
  \label{tab:classifier-hyps}
 \end{table}

-The random search was run for 138 iterations which equates to a 75\%
-probability that the best solution lies within 1\% of the theoretical
+The random search was run for \num{138} iterations which equates to a
+75\% probability that the best solution lies within 1\% of the
+theoretical
 maximum~\eqref{eq:opt-prob}. Figure~\ref{fig:classifier-hyp-results}
 shows three of the eight parameters and their impact on a high
 $\mathrm{F}_1$-score. \gls{sgd} has less variation in its results than
-Adam~\cite{kingma2017} and manages to provide eight out of the ten
+Adam \cite{kingma2017} and manages to provide eight out of the ten
 best results. The number of epochs to train for was chosen based on
 the observation that almost all configurations converge well before
 reaching the tenth epoch. The assumption that a training run with ten
@ -2786,38 +2791,39 @@ figure~\ref{fig:classifier-training-metrics}.
 \begin{figure}
  \centering
  \includegraphics{graphics/classifier-hyp-metrics.pdf}
-  \caption[Classifier hyper-parameter optimization results.]{This
-    figure shows three of the eight hyper-parameters and their
-    performance measured by the $\mathrm{F}_1$-score during 138
+  \caption[Classifier hyperparameter optimization results.]{This
+    figure shows three of the eight hyperparameters and their
+    performance measured by the $\mathrm{F}_1$-score during \num{138}
    trials. Differently colored markers show the batch size with
    darker colors representing a larger batch size. The type of marker
-    (circle or cross) shows which optimizer was used. The x-axis shows
-    the learning rate on a logarithmic scale. In general, a learning
-    rate between 0.003 and 0.01 results in more robust and better
-    $\mathrm{F}_1$-scores. Larger batch sizes more often lead to
-    better performance as well. As for the type of optimizer,
-    \gls{sgd} produced the best iteration with an $\mathrm{F}_1$-score
-    of 0.9783. Adam tends to require more customization of its
-    parameters than \gls{sgd} to achieve good results.}
+    (circle or cross) shows which optimizer was used. The $x$-axis
+    shows the learning rate on a logarithmic scale. In general, a
+    learning rate between \num{0.003} and \num{0.01} results in more
+    robust and better $\mathrm{F}_1$-scores. Larger batch sizes more
+    often lead to better performance as well. As for the type of
+    optimizer, \gls{sgd} produced the best iteration with an
+    $\mathrm{F}_1$-score of \num{0.9783}. Adam tends to require more
+    customization of its parameters than \gls{sgd} to achieve good
+    results.}
  \label{fig:classifier-hyp-results}
 \end{figure}

-Table~\ref{tab:classifier-final-hyps} lists the final hyper-parameters
+Table~\ref{tab:classifier-final-hyps} lists the final hyperparameters
 which were chosen to train the improved model. In order to confirm
 that the model does not suffer from overfitting or is a product of
 chance due to a coincidentally advantageous train/test split, we
 perform stratified $10$-fold cross validation on the dataset. Each
-fold contains 90\% training and 10\% test data and was trained for 25
-epochs. Figure~\ref{fig:classifier-hyp-roc} shows the performance of
-the epoch with the highest $\mathrm{F}_1$-score of each fold as
-measured against the test split. The mean \gls{roc} curve provides a
-robust metric for a classifier's performance because it averages out
-the variability of the evaluation. Each fold manages to achieve at
-least an \gls{auc} of 0.94, while the best fold reaches 0.98. The mean
-\gls{roc} has an \gls{auc} of 0.96 with a standard deviation of
-0.02. These results indicate that the model is accurately predicting
-the correct class and is robust against variations in the training
-set.
+fold contains 90\% training and 10\% test data and was trained for
+\num{25} epochs. Figure~\ref{fig:classifier-hyp-roc} shows the
+performance of the epoch with the highest $\mathrm{F}_1$-score of each
+fold as measured against the test split. The mean \gls{roc} curve
+provides a robust metric for a classifier's performance because it
+averages out the variability of the evaluation. Each fold manages to
+achieve at least an \gls{auc} of \num{0.94}, while the best fold
+reaches \num{0.99}. The mean \gls{roc} has an \gls{auc} of \num{0.96}
+with a standard deviation of \num{0.02}. These results indicate that
+the model is accurately predicting the correct class and is robust
+against variations in the training set.

 \begin{table}
  \centering
@ -2828,8 +2834,8 @@ set.
    \gls{sgd} & 64 & 0.01 & 5\\
    \bottomrule
  \end{tabular}
-  \caption[Hyper-parameters for the optimized classifier.]{Chosen
-    hyper-parameters for the final, improved model. The difference to
+  \caption[Hyperparameters for the optimized classifier.]{Chosen
+    hyperparameters for the final, improved model. The difference to
    the parameters listed in Table~\ref{tab:classifier-hyps} comes as
    a result of choosing \gls{sgd} over Adam. The missing four
    parameters are only required for Adam and not \gls{sgd}.}
@ -2839,16 +2845,16 @@ set.
 \begin{figure}
  \centering
  \includegraphics{graphics/classifier-hyp-folds-roc.pdf}
-  \caption[Mean \gls{roc} and variability of hyper-parameter-optimized
+  \caption[Mean \gls{roc} and variability of hyperparameter-optimized
  model.]{This plot shows the \gls{roc} curve for the epoch with the
    highest $\mathrm{F}_1$-score of each fold as well as the
    \gls{auc}. To get a less variable performance metric of the
    classifier, the mean \gls{roc} curve is shown as a thick line and
    the variability is shown in gray. The overall mean \gls{auc} is
-    0.96 with a standard deviation of 0.02. The best-performing fold
-    reaches an \gls{auc} of 0.99 and the worst an \gls{auc} of
-    0.94. The black dashed line indicates the performance of a
-    classifier which picks classes at random
+    \num{0.96} with a standard deviation of \num{0.02}. The
+    best-performing fold reaches an \gls{auc} of \num{0.99} and the
+    worst an \gls{auc} of \num{0.94}. The black dashed line indicates
+    the performance of a classifier which picks classes at random
    ($\mathrm{\gls{auc}} = 0.5$). The shapes of the \gls{roc} curves
    show that the classifier performs well and is robust against
    variations in the training set.}
@ -2862,12 +2868,12 @@ during testing gives insight into when the model tries to increase its
 performance during training at the expense of
 generalizability. Figure~\ref{fig:classifier-hyp-folds} shows the
 $\mathrm{F}_1$-scores of each epoch and fold. The classifier converges
-quickly to 1 for the training set at which point it experiences a
-slight drop in generalizability. Training the model for at most five
+quickly to \num{1} for the training set at which point it experiences
+a slight drop in generalizability. Training the model for at most five
 epochs is sufficient because there are generally no improvements
 afterwards. The best-performing epoch for each fold is between the
 second and fourth epoch which is just before the model achieves an
-$\mathrm{F}_1$-score of 1 on the training set.
+$\mathrm{F}_1$-score of \num{1} on the training set.

 \begin{figure}
  \centering
@ -2875,34 +2881,57 @@ $\mathrm{F}_1$-score of 1 on the training set.
  \caption[$\mathrm{F}_1$-score of stratified $10$-fold cross
  validation.]{These plots show the $\mathrm{F}_1$-score during
    training as well as testing for each of the folds. The classifier
-    converges to 1 by the third epoch during the training phase, which
-    might indicate overfitting. However, the performance during
+    converges to \num{1} by the third epoch during the training phase,
+    which might indicate overfitting. However, the performance during
    testing increases until epoch three in most cases and then
-    stabilizes at approximately 2-3\% lower than the best epoch. We
-    believe that the third, or in some cases fourth, epoch is
-    detrimental to performance and results in overfitting, because the
-    model achieves an $\mathrm{F}_1$-score of 1 for the training set,
-    but that gain does not transfer to the test set. Early stopping
-    during training alleviates this problem.}
+    stabilizes at approximately 2-3 percentage points lower than the
+    best epoch. We believe that the third, or in some cases fourth,
+    epoch is detrimental to performance and results in overfitting,
+    because the model achieves an $\mathrm{F}_1$-score of \num{1} for
+    the training set, but that gain does not transfer to the test
+    set. Early stopping during training alleviates this problem.}
  \label{fig:classifier-hyp-folds}
 \end{figure}

 \section{Deployment}

-Describe the Jetson Nano, how the model is deployed to the device and
-how it reports its results (REST API).
+After training of the two models (object detector and classifier), we
+export them to the \gls{onnx}\footnote{\url{https://github.com/onnx}}
+format and move the model files to the Nvidia Jetson Nano. On the
+device, a Flask application (\emph{server}) provides a \gls{rest}
+endpoint from which the results of the most recent prediction can be
+queried. The server periodically performs the following steps:

-Estimated 2 pages for this section.
+\begin{enumerate}
+\item Call a binary which takes an image and writes it to a file.
+\item Take the image and detect all plants as well as their status
+  using the two models.
+\item Draw the returned bounding boxes onto the original image.
+\item Number each detection from left to right.
+\item Coerce the prediction for each bounding box into a tuple
+  $\langle I, S, T,\Delta T \rangle$.
+\item Store the image with the bounding boxes and an array of all
+  tuples (predictions) in a dictionary.
+\item Wait two minutes.
+\item Go to step one.
+\end{enumerate}
+
+The binary uses the accelerated GStreamer implementation by Nvidia to
+take an image. The tuple $\langle I, S, T,\Delta T \rangle$ consists of the following
+items: $I$ is the number of the bounding box in the image, $S$ the
+current state from one to ten, $T$ the timestamp of the prediction,
+and $\Delta T$ the time since the state $S$ last fell under three. The
+server performs these tasks asynchronously in the background and is
+always ready to respond to requests with the most recent prediction.

 \chapter{Evaluation}
 \label{chap:evaluation}

 The following sections contain a detailed evaluation of the model in
-various scenarios. First, we present metrics from the training phases
-of the constituent models. Second, we employ methods from the field of
-\gls{xai} such as \gls{grad-cam} to get a better understanding of the
-models' abstractions. Finally, we turn to the models' aggregate
-performance on the test set.
+various scenarios. We employ methods from the field of \gls{xai} such
+as \gls{grad-cam} to get a better understanding of the models'
+abstractions. Finally, we turn to the models' aggregate performance on
+the test set.

 \section{Methodology}
 \label{sec:methodology}