Finish Introduction

2023-09-20 08:36:11 +02:00 · 2023-09-20 08:36:11 +02:00 · 94a01f81e5
commit 94a01f81e5
parent 2e7c669e1a
3 changed files with 188 additions and 52 deletions
--- a/thesis/references.bib
+++ b/thesis/references.bib
@ -149,6 +149,23 @@
  keywords = {Confusion matrix,Crop phenotyping,DCNN,Digital agriculture,Machine learning}
 }

+@article{davis1992,
+  title = {Operational Prototyping: A New Development Approach},
+  shorttitle = {Operational Prototyping},
+  author = {Davis, A.M.},
+  date = {1992-09},
+  journaltitle = {IEEE Software},
+  volume = {9},
+  number = {5},
+  pages = {70--78},
+  issn = {1937-4194},
+  doi = {10.1109/52.156899},
+  abstract = {The two traditional types of software prototyping methods, throwaway prototyping and evolutionary prototyping, are compared, and prototyping's relation to conventional software development is discussed. Operational prototyping, a method that combines throwaway and evolutionary prototyping techniques by layering a rapid prototype over a solid evolutionary base, is described. Operational prototyping's implications for configuration management, quality assurance, and general project management are reviewed. The application of operational prototyping to a prototype ocean surveillance terminal is presented.{$<>$}},
+  eventtitle = {{{IEEE Software}}},
+  keywords = {Application software,Oceans,Programming,Project management,Prototypes,Quality assurance,Quality management,Software prototyping,Solids,Surveillance},
+  file = {/home/zenon/Zotero/storage/7NBJW3VE/Davis - 1992 - Operational prototyping a new development approac.pdf;/home/zenon/Zotero/storage/N96N3CIA/156899.html}
+}
+
@inproceedings{deng2009,
  title = {{{ImageNet}}: {{A Large-Scale Hierarchical Image Database}}},
  shorttitle = {{{ImageNet}}},
@ -162,6 +179,24 @@
  keywords = {Explosions,Image databases,Image retrieval,Information retrieval,Internet,Large-scale systems,Multimedia databases,Ontologies,Robustness,Spine}
 }

+@article{everingham2010,
+  title = {The {{Pascal Visual Object Classes}} ({{VOC}}) {{Challenge}}},
+  author = {Everingham, Mark and Van Gool, Luc and Williams, Christopher K. I. and Winn, John and Zisserman, Andrew},
+  date = {2010-06-01},
+  journaltitle = {International Journal of Computer Vision},
+  shortjournal = {Int J Comput Vis},
+  volume = {88},
+  number = {2},
+  pages = {303--338},
+  issn = {1573-1405},
+  doi = {10.1007/s11263-009-0275-4},
+  urldate = {2023-09-07},
+  abstract = {The Pascal Visual Object Classes (VOC) challenge is a benchmark in visual object category recognition and detection, providing the vision and machine learning communities with a standard dataset of images and annotation, and standard evaluation procedures. Organised annually from 2005 to present, the challenge and its associated dataset has become accepted as the benchmark for object detection.},
+  langid = {english},
+  keywords = {Benchmark,Database,Object detection,Object recognition},
+  file = {/home/zenon/Zotero/storage/FCRT6NYG/Everingham et al. - 2010 - The Pascal Visual Object Classes (VOC) Challenge.pdf}
+}
+
@inproceedings{he2016,
  title = {Deep {{Residual Learning}} for {{Image Recognition}}},
  booktitle = {2016 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
@ -211,7 +246,6 @@
  pages = {1200977},
  issn = {2624-8212},
  doi = {10.3389/frai.2023.1200977},
-  url = {http://arxiv.org/abs/2211.02972},
  urldate = {2023-08-25},
  abstract = {Machine learning tasks often require a significant amount of training data for the resultant network to perform suitably for a given problem in any domain. In agriculture, dataset sizes are further limited by phenotypical differences between two plants of the same genotype, often as a result of differing growing conditions. Synthetically-augmented datasets have shown promise in improving existing models when real data is not available. In this paper, we employ a contrastive unpaired translation (CUT) generative adversarial network (GAN) and simple image processing techniques to translate indoor plant images to appear as field images. While we train our network to translate an image containing only a single plant, we show that our method is easily extendable to produce multiple-plant field images. Furthermore, we use our synthetic multi-plant images to train several YoloV5 nano object detection models to perform the task of plant detection and measure the accuracy of the model on real field data images. Including training data generated by the CUT-GAN leads to better plant detection performance compared to a network trained solely on real data.},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
@ -260,7 +294,6 @@
  eprintclass = {cs},
  pages = {21--37},
  doi = {10.1007/978-3-319-46448-0_2},
-  url = {http://arxiv.org/abs/1512.02325},
  urldate = {2023-08-24},
  abstract = {We present a method for detecting objects in images using a single deep neural network. Our approach, named SSD, discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. Our SSD model is simple relative to methods that require object proposals because it completely eliminates proposal generation and subsequent pixel or feature resampling stage and encapsulates all computation in a single network. This makes SSD easy to train and straightforward to integrate into systems that require a detection component. Experimental results on the PASCAL VOC, MS COCO, and ILSVRC datasets confirm that SSD has comparable accuracy to methods that utilize an additional object proposal step and is much faster, while providing a unified framework for both training and inference. Compared to other single stage methods, SSD has much better accuracy, even with a smaller input image size. For \$300\textbackslash times 300\$ input, SSD achieves 72.1\% mAP on VOC2007 test at 58 FPS on a Nvidia Titan X and for \$500\textbackslash times 500\$ input, SSD achieves 75.1\% mAP, outperforming a comparable state of the art Faster R-CNN model. Code is available at https://github.com/weiliu89/caffe/tree/ssd .},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
@ -367,13 +400,26 @@
  pages = {970},
  issn = {2223-7747},
  doi = {10.3390/plants11070970},
-  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9003083/},
  urldate = {2023-08-25},
  abstract = {Plant stress is one of the most significant factors affecting plant fitness and, consequently, food production. However, plant stress may also be profitable since it behaves hormetically; at low doses, it stimulates positive traits in crops, such as the synthesis of specialized metabolites and additional stress tolerance. The controlled exposure of crops to low doses of stressors is therefore called hormesis management, and it is a promising method to increase crop productivity and quality. Nevertheless, hormesis management has severe limitations derived from the complexity of plant physiological responses to stress. Many technological advances assist plant stress science in overcoming such limitations, which results in extensive datasets originating from the multiple layers of the plant defensive response. For that reason, artificial intelligence tools, particularly Machine Learning (ML) and Deep Learning (DL), have become crucial for processing and interpreting data to accurately model plant stress responses such as genomic variation, gene and protein expression, and metabolite biosynthesis. In this review, we discuss the most recent ML and DL applications in plant stress science, focusing on their potential for improving the development of hormesis management protocols.},
  pmcid = {PMC9003083},
  file = {/home/zenon/Zotero/storage/56I7ELHW/Rico-Chávez et al. - 2022 - Machine Learning for Plant Stress Modeling A Pers.pdf}
 }

+@inproceedings{sears2007,
+  title = {Prototyping {{Tools}} and {{Techniques}}},
+  booktitle = {The {{Human-Computer Interaction Handbook}}},
+  editor = {Sears, Andrew and Jacko, Julie A. and Jacko, Julie A.},
+  date = {2007-09-19},
+  pages = {1043--1066},
+  publisher = {{CRC Press}},
+  doi = {10.1201/9781410615862-66},
+  urldate = {2023-09-17},
+  abstract = {We begin with our definition of a prototype and then discuss prototypes as design artifacts, introducing four dimensions for analyzing them. We then discuss the role of prototyping within the design process, in particular the concept of a design space, and how it is expanded and contracted by generating and selecting design ideas. The next three sections describe specific prototyping approaches: Rapid prototyping, both off-line and on-line, for early stages of design, iterative prototyping, which uses on-line development tools, and evolutionary prototyping, which must be based on a sound software architecture.},
+  isbn = {978-0-429-16397-5},
+  langid = {english}
+}
+
@article{selvaraju2020,
  title = {Grad-{{CAM}}: {{Visual Explanations}} from {{Deep Networks}} via {{Gradient-based Localization}}},
  shorttitle = {Grad-{{CAM}}},
--- a/thesis/thesis.pdf
+++ b/thesis/thesis.pdf
--- a/thesis/thesis.tex
+++ b/thesis/thesis.tex
@ -85,6 +85,11 @@
 \newacronym{sgd}{SGD}{Stochastic Gradient Descent}
 \newacronym{roc}{ROC}{Receiver Operating Characteristic}
 \newacronym{auc}{AUC}{Area Under the Curve}
+\newacronym{coco}{COCO}{Common Objects in Context}
+\newacronym{pascal-voc}{\textsc{PASCAL} VOC}{\textsc{PASCAL} Visual Object Classes}
+\newacronym{sbc}{SBC}{single-board computer}
+\newacronym{api}{API}{Application Programming Interface}
+\newacronym{rest}{REST}{Representational State Transfer}

 \begin{document}

@ -168,7 +173,9 @@ installation which is deployable by homeowners, gathers data using
 readily available hardware and performs computation on the device
 without a connection to a central server. The device should be able to
 visually determine whether the plants in its field of view need water
-or not and output its recommendation.
+or not and output its recommendation. The recommendation should then
+be used as a data point off of which homeowners can automatically
+water their plants with an automated watering system.

 The aim of this work is to develop a prototype which can be deployed
 by gardeners to survey plants and recommend watering or not. To this
@ -177,14 +184,13 @@ plants in the field of view and then to determine if the plants need
 water or not. The model should be suitable for edge devices equipped
 with a TPU or GPU but with otherwise limited processing
 capabilities. Examples of such systems include Google's Coral
-development board and the Nvidia Jetson series of single-board
-computers (SBCs). The model should make use of state-of-the-art
-algorithms from either classical machine learning or deep
-learning. The literature review will yield an appropriate machine
-learning method. Furthermore, the adaption of existing models
-(transfer learning) for object detection to the domain of plant
-recognition may provide higher performance than would otherwise be
-achievable within the time constraints.
+development board and the Nvidia Jetson series of~\glspl{sbc}. The
+model should make use of state-of-the-art algorithms from either
+classical machine learning or deep learning. The literature review
+will yield an appropriate machine learning method. Furthermore, the
+adaption of existing models (transfer learning) for object detection
+to the domain of plant recognition may provide higher performance than
+would otherwise be achievable within the time constraints.

 The model will be deployed to the single-board computer and evaluated
 using established and well-known metrics from the field of machine
@ -245,44 +251,122 @@ learning. The evaluation will seek to answer the following questions:

 The methodological approach consists of the following steps:

-\begin{description}
-\item[Literature Review] The literature review informs the type of
-  machine learning methods which are later applied during the
+\begin{enumerate}
+\item \textbf{Literature Review}: The literature review informs the
+  type of machine learning methods which are later applied during the
  implementation of the prototype.
-\item[Object Detection] Flowers present in the image will be detected
-  using object detection methods. These methods will draw bounding
-  boxes around the objects of interest. The output is fed into the
-  next stage.
-\item[State Classification] The bounded images will be fed to a
-  classifier which will determine whether the plant needs water or
-  not.
-\item[Deployment to SBC] The software prototype will be deployed to
-  the single-board computer.
-\item[Evaluation] The prototype will be evaluated to determine its
-  feasibility and performance. During evaluation the author seeks to
-  provide a basis for answering the research questions.
-\end{description}
+\item \textbf{Dataset Curation}: After selecting the methods to use
+  for the implementation, we have to create our own dataset or use
+  existing ones, depending on availability.
+\item \textbf{Model Training}: The selected models will be trained
+  with the datasets curated in the previous step.
+\item \textbf{Optimization}: The selected models will be optimized
+  with respect to their parameters.
+\item \textbf{Deployment to SBC}: The software prototype will be
+  deployed to the single-board computer.
+\item \textbf{Evaluation}: The models will be evaluated extensively
+  and compared to other state-of-the-art systems. During evaluation,
+  the author seeks to provide a basis for answering the research
+  questions.
+\end{enumerate}

-Additionally, go into detail about how the literature was selected to
-be relevant for the decisions underlying the choice of
-models/algorithms. Mention how literature in general was found (search
-terms, platforms, etc.).
+During the literature review, the search is centered around the terms
+\emph{plant classification}, \emph{plant state classification},
+\emph{plant detection}, \emph{water stress detection}, \emph{machine
+learning agriculture}, \emph{crop machine learning} and \emph{remote
+sensing}. These terms provide a solid basis for understanding the
+state of the art in plant detection and stress classification. We will
+use multiple search engines such as Google Scholar, Semantic Scholar,
+the ACM Digital Library, and IEEE Xplore. It is common to only publish
+research papers in preprint form in the data science and machine
+learning fields. For this reason, we will also reference arXiv.org for
+these papers. The work discovered in this way will also lead to
+further insights about the type of models which are commonly used.
+
+In order to find and select appropriate datasets to train the models
+on, we will survey the existing big datasets for classes we can
+use. Datasets such as the \gls{coco}~\cite{lin2015} and
+\gls{pascal-voc}~\cite{everingham2010} contain the highly relevant
+class \emph{Potted Plant}. By extracting only these classes from
+multiple datasets and concatenating them together, it is possible to
+create one unified dataset which only contains the classes necessary
+for training the model.
+
+The training of the models will happen in an environment where more
+computational resources are available than what the~\gls{sbc}
+offers. We will deploy the final model with the~\gls{api} to
+the~\gls{sbc} after training and optimization. Furthermore, training
+will happen in tandem with a continuous evaluation process. After
+every iteration of the model, an evaluation run against the test set
+determines if there has been an improvement in performance. The
+results of the evaluation feed back into the parameter selection at
+the beginning of each training phase. Small changes to the training
+parameters, augmentations or structure of the model are followed by
+another test phase. The iterative nature of the development of the
+prototype increases the likelihood that the model's performance is not
+only locally maximal but also as close as possible to the global
+maximum.
+
+In the final evaluation phase, we will measure the resulting model
+against the test set and evaluate its performance with common
+metrics. The aim is to first provide a solid basis of facts regarding
+the model(s). Second, the results will be discussed in detail. Third,
+we will cross-check the results with the hypotheses from
+section~\ref{sec:motivation} and determine whether the aim of the work
+has been met, and—if not—give reasons for the rejection of all or part
+of the hypotheses.
+
+Overall, the development of our application follows an evolutionary
+pototyping process~\cite{davis1992,sears2007}. Instead of producing a
+full-fledged product from the start, development happens iteratively
+in phases. The main phases and their order for the prototype at hand
+are: model selection, implementation, and evaluation. The results of
+each phase—for example, which model has been selected—inform the
+decisions which have to be made in the next phase (implementation). In
+other words, every subsequent phase is dependent on the results of the
+previous phase. All three phases, in turn, constitute one iteration
+within the prototyping process. At the start of the next prototype,
+the results of the previous iteration determine the path forward.
+
+The decision to use an evolutionary prototyping process follows in
+large part from the problem to be solved (as specified in
+section~\ref{sec:motivation}). Since the critical requirements have
+been established from the start, it is possible to build a solid
+prototype from the beginning by implementing only those features which
+are well-understood. The aim is to allow the developer to explore the
+problem further so that additional requirements which arise during
+development can be incorporated properly.
+
+The prototyping process is embedded within the concepts of the
+\emph{Scientific Method}. This thesis not only produces a prototype,
+but also explores the problem of plant detection and classification
+scientifically. Exploration of the problem requires making falsifiable
+hypotheses (see section~\ref{sec:motivation}), gathering empirical
+evidence (see section~\ref{sec:results}), and accepting or rejecting
+the initial hypotheses (see section~\ref{sec:discussion}). Empirical
+evidence is provided by measuring the model(s) against out-of-sample
+test sets. This provides the necessary foundation for acceptance or
+rejection of the hypotheses.

 \section{Thesis Structure}
 \label{sec:structure}

 The first part of the thesis (chapter~\ref{chap:background}) contains
 the theoretical basis of the models which we use for the
-prototype. Chapter~\ref{chap:design} goes into detail about the design
-of the prototype, the construction of the training/test sets and how
-the prototype reports its results via its REST
-API. Chapter~\ref{chap:evaluation} shows the results of the testing
-phases as well as the performance of the aggregate model. Futhermore,
-the results are compared with the expectations and it is discussed
-whether they are explainable in the context of the task at hand as
-well as benchmark results from other datasets
-(COCO). Chapter~\ref{chap:conclusion} concludes the thesis with an
-outlook on further research questions and possible improvements.
+prototype. Chapter~\ref{chap:design} goes into detail about the
+requirements for the prototype, the overall design and architecture of
+the recognition and classification pipeline, and the structure and
+unique properties of the selected
+models. Chapter~\ref{chap:implementation} expands on how the datasets
+are used during training as well as how the prototype publishes its
+classification results. Chapter~\ref{chap:evaluation} shows the
+results of the testing phases as well as the performance of the
+aggregate model. Futhermore, the results are compared with the
+expectations and it is discussed whether they are explainable in the
+context of the task at hand as well as benchmark results from other
+datasets (\gls{coco}~\cite{lin2015}). Chapter~\ref{chap:conclusion}
+concludes the thesis with a summary and an outlook on possible
+improvements and further research questions.

 \chapter{Theoretical Background}
 \label{chap:background}
@ -553,16 +637,17 @@ loop on the prototype in Figure~\ref{fig:setup}.
 \begin{figure}
  \centering
  \includegraphics[width=0.8\textwidth]{graphics/setup.pdf}
-  \caption{Methodological approach for the prototype. The prototype
-    will run in a loop which starts at the top left corner. First, the
-    camera attached to the prototype takes images of plants. These
-    images are passed to the models running on the prototype. The
-    first model generates bounding boxes for all detected plants. The
-    bounding boxes are used to cut out the individual plants and pass
-    them to the state classifier in sequence. The classifier outputs a
-    probability score indicating the amount of stress the plant is
-    experiencing. After a set amount of time, the camera takes a
-    picture again and the process continues indefinitely.}
+  \caption[Methodological approach for the prototype.]{Methodological
+    approach for the prototype. The prototype will run in a loop which
+    starts at the top left corner. First, the camera attached to the
+    prototype takes images of plants. These images are passed to the
+    models running on the prototype. The first model generates
+    bounding boxes for all detected plants. The bounding boxes are
+    used to cut out the individual plants and pass them to the state
+    classifier in sequence. The classifier outputs a probability score
+    indicating the amount of stress the plant is experiencing. After a
+    set amount of time, the camera takes a picture again and the
+    process continues indefinitely.}
  \label{fig:setup}
 \end{figure}

@ -1457,4 +1542,9 @@ Estimated 1 page for this section
 %%% TeX-master: t
 %%% TeX-master: t
 %%% TeX-master: t
+%%% TeX-master: t
+%%% TeX-master: t
+%%% TeX-master: t
+%%% TeX-master: t
+%%% TeX-master: t
 %%% End: