Finish Introduction

This commit is contained in:
Tobias Eidelpes 2023-09-20 08:36:11 +02:00
parent 2e7c669e1a
commit 94a01f81e5
3 changed files with 188 additions and 52 deletions

View File

@ -149,6 +149,23 @@
keywords = {Confusion matrix,Crop phenotyping,DCNN,Digital agriculture,Machine learning}
}
@article{davis1992,
title = {Operational Prototyping: A New Development Approach},
shorttitle = {Operational Prototyping},
author = {Davis, A.M.},
date = {1992-09},
journaltitle = {IEEE Software},
volume = {9},
number = {5},
pages = {70--78},
issn = {1937-4194},
doi = {10.1109/52.156899},
abstract = {The two traditional types of software prototyping methods, throwaway prototyping and evolutionary prototyping, are compared, and prototyping's relation to conventional software development is discussed. Operational prototyping, a method that combines throwaway and evolutionary prototyping techniques by layering a rapid prototype over a solid evolutionary base, is described. Operational prototyping's implications for configuration management, quality assurance, and general project management are reviewed. The application of operational prototyping to a prototype ocean surveillance terminal is presented.{$<>$}},
eventtitle = {{{IEEE Software}}},
keywords = {Application software,Oceans,Programming,Project management,Prototypes,Quality assurance,Quality management,Software prototyping,Solids,Surveillance},
file = {/home/zenon/Zotero/storage/7NBJW3VE/Davis - 1992 - Operational prototyping a new development approac.pdf;/home/zenon/Zotero/storage/N96N3CIA/156899.html}
}
@inproceedings{deng2009,
title = {{{ImageNet}}: {{A Large-Scale Hierarchical Image Database}}},
shorttitle = {{{ImageNet}}},
@ -162,6 +179,24 @@
keywords = {Explosions,Image databases,Image retrieval,Information retrieval,Internet,Large-scale systems,Multimedia databases,Ontologies,Robustness,Spine}
}
@article{everingham2010,
title = {The {{Pascal Visual Object Classes}} ({{VOC}}) {{Challenge}}},
author = {Everingham, Mark and Van Gool, Luc and Williams, Christopher K. I. and Winn, John and Zisserman, Andrew},
date = {2010-06-01},
journaltitle = {International Journal of Computer Vision},
shortjournal = {Int J Comput Vis},
volume = {88},
number = {2},
pages = {303--338},
issn = {1573-1405},
doi = {10.1007/s11263-009-0275-4},
urldate = {2023-09-07},
abstract = {The Pascal Visual Object Classes (VOC) challenge is a benchmark in visual object category recognition and detection, providing the vision and machine learning communities with a standard dataset of images and annotation, and standard evaluation procedures. Organised annually from 2005 to present, the challenge and its associated dataset has become accepted as the benchmark for object detection.},
langid = {english},
keywords = {Benchmark,Database,Object detection,Object recognition},
file = {/home/zenon/Zotero/storage/FCRT6NYG/Everingham et al. - 2010 - The Pascal Visual Object Classes (VOC) Challenge.pdf}
}
@inproceedings{he2016,
title = {Deep {{Residual Learning}} for {{Image Recognition}}},
booktitle = {2016 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
@ -211,7 +246,6 @@
pages = {1200977},
issn = {2624-8212},
doi = {10.3389/frai.2023.1200977},
url = {http://arxiv.org/abs/2211.02972},
urldate = {2023-08-25},
abstract = {Machine learning tasks often require a significant amount of training data for the resultant network to perform suitably for a given problem in any domain. In agriculture, dataset sizes are further limited by phenotypical differences between two plants of the same genotype, often as a result of differing growing conditions. Synthetically-augmented datasets have shown promise in improving existing models when real data is not available. In this paper, we employ a contrastive unpaired translation (CUT) generative adversarial network (GAN) and simple image processing techniques to translate indoor plant images to appear as field images. While we train our network to translate an image containing only a single plant, we show that our method is easily extendable to produce multiple-plant field images. Furthermore, we use our synthetic multi-plant images to train several YoloV5 nano object detection models to perform the task of plant detection and measure the accuracy of the model on real field data images. Including training data generated by the CUT-GAN leads to better plant detection performance compared to a network trained solely on real data.},
keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
@ -260,7 +294,6 @@
eprintclass = {cs},
pages = {21--37},
doi = {10.1007/978-3-319-46448-0_2},
url = {http://arxiv.org/abs/1512.02325},
urldate = {2023-08-24},
abstract = {We present a method for detecting objects in images using a single deep neural network. Our approach, named SSD, discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. Our SSD model is simple relative to methods that require object proposals because it completely eliminates proposal generation and subsequent pixel or feature resampling stage and encapsulates all computation in a single network. This makes SSD easy to train and straightforward to integrate into systems that require a detection component. Experimental results on the PASCAL VOC, MS COCO, and ILSVRC datasets confirm that SSD has comparable accuracy to methods that utilize an additional object proposal step and is much faster, while providing a unified framework for both training and inference. Compared to other single stage methods, SSD has much better accuracy, even with a smaller input image size. For \$300\textbackslash times 300\$ input, SSD achieves 72.1\% mAP on VOC2007 test at 58 FPS on a Nvidia Titan X and for \$500\textbackslash times 500\$ input, SSD achieves 75.1\% mAP, outperforming a comparable state of the art Faster R-CNN model. Code is available at https://github.com/weiliu89/caffe/tree/ssd .},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
@ -367,13 +400,26 @@
pages = {970},
issn = {2223-7747},
doi = {10.3390/plants11070970},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9003083/},
urldate = {2023-08-25},
abstract = {Plant stress is one of the most significant factors affecting plant fitness and, consequently, food production. However, plant stress may also be profitable since it behaves hormetically; at low doses, it stimulates positive traits in crops, such as the synthesis of specialized metabolites and additional stress tolerance. The controlled exposure of crops to low doses of stressors is therefore called hormesis management, and it is a promising method to increase crop productivity and quality. Nevertheless, hormesis management has severe limitations derived from the complexity of plant physiological responses to stress. Many technological advances assist plant stress science in overcoming such limitations, which results in extensive datasets originating from the multiple layers of the plant defensive response. For that reason, artificial intelligence tools, particularly Machine Learning (ML) and Deep Learning (DL), have become crucial for processing and interpreting data to accurately model plant stress responses such as genomic variation, gene and protein expression, and metabolite biosynthesis. In this review, we discuss the most recent ML and DL applications in plant stress science, focusing on their potential for improving the development of hormesis management protocols.},
pmcid = {PMC9003083},
file = {/home/zenon/Zotero/storage/56I7ELHW/Rico-Chávez et al. - 2022 - Machine Learning for Plant Stress Modeling A Pers.pdf}
}
@inproceedings{sears2007,
title = {Prototyping {{Tools}} and {{Techniques}}},
booktitle = {The {{Human-Computer Interaction Handbook}}},
editor = {Sears, Andrew and Jacko, Julie A. and Jacko, Julie A.},
date = {2007-09-19},
pages = {1043--1066},
publisher = {{CRC Press}},
doi = {10.1201/9781410615862-66},
urldate = {2023-09-17},
abstract = {We begin with our definition of a prototype and then discuss prototypes as design artifacts, introducing four dimensions for analyzing them. We then discuss the role of prototyping within the design process, in particular the concept of a design space, and how it is expanded and contracted by generating and selecting design ideas. The next three sections describe specific prototyping approaches: Rapid prototyping, both off-line and on-line, for early stages of design, iterative prototyping, which uses on-line development tools, and evolutionary prototyping, which must be based on a sound software architecture.},
isbn = {978-0-429-16397-5},
langid = {english}
}
@article{selvaraju2020,
title = {Grad-{{CAM}}: {{Visual Explanations}} from {{Deep Networks}} via {{Gradient-based Localization}}},
shorttitle = {Grad-{{CAM}}},

Binary file not shown.

View File

@ -85,6 +85,11 @@
\newacronym{sgd}{SGD}{Stochastic Gradient Descent}
\newacronym{roc}{ROC}{Receiver Operating Characteristic}
\newacronym{auc}{AUC}{Area Under the Curve}
\newacronym{coco}{COCO}{Common Objects in Context}
\newacronym{pascal-voc}{\textsc{PASCAL} VOC}{\textsc{PASCAL} Visual Object Classes}
\newacronym{sbc}{SBC}{single-board computer}
\newacronym{api}{API}{Application Programming Interface}
\newacronym{rest}{REST}{Representational State Transfer}
\begin{document}
@ -168,7 +173,9 @@ installation which is deployable by homeowners, gathers data using
readily available hardware and performs computation on the device
without a connection to a central server. The device should be able to
visually determine whether the plants in its field of view need water
or not and output its recommendation.
or not and output its recommendation. The recommendation should then
be used as a data point off of which homeowners can automatically
water their plants with an automated watering system.
The aim of this work is to develop a prototype which can be deployed
by gardeners to survey plants and recommend watering or not. To this
@ -177,14 +184,13 @@ plants in the field of view and then to determine if the plants need
water or not. The model should be suitable for edge devices equipped
with a TPU or GPU but with otherwise limited processing
capabilities. Examples of such systems include Google's Coral
development board and the Nvidia Jetson series of single-board
computers (SBCs). The model should make use of state-of-the-art
algorithms from either classical machine learning or deep
learning. The literature review will yield an appropriate machine
learning method. Furthermore, the adaption of existing models
(transfer learning) for object detection to the domain of plant
recognition may provide higher performance than would otherwise be
achievable within the time constraints.
development board and the Nvidia Jetson series of~\glspl{sbc}. The
model should make use of state-of-the-art algorithms from either
classical machine learning or deep learning. The literature review
will yield an appropriate machine learning method. Furthermore, the
adaption of existing models (transfer learning) for object detection
to the domain of plant recognition may provide higher performance than
would otherwise be achievable within the time constraints.
The model will be deployed to the single-board computer and evaluated
using established and well-known metrics from the field of machine
@ -245,44 +251,122 @@ learning. The evaluation will seek to answer the following questions:
The methodological approach consists of the following steps:
\begin{description}
\item[Literature Review] The literature review informs the type of
machine learning methods which are later applied during the
\begin{enumerate}
\item \textbf{Literature Review}: The literature review informs the
type of machine learning methods which are later applied during the
implementation of the prototype.
\item[Object Detection] Flowers present in the image will be detected
using object detection methods. These methods will draw bounding
boxes around the objects of interest. The output is fed into the
next stage.
\item[State Classification] The bounded images will be fed to a
classifier which will determine whether the plant needs water or
not.
\item[Deployment to SBC] The software prototype will be deployed to
the single-board computer.
\item[Evaluation] The prototype will be evaluated to determine its
feasibility and performance. During evaluation the author seeks to
provide a basis for answering the research questions.
\end{description}
\item \textbf{Dataset Curation}: After selecting the methods to use
for the implementation, we have to create our own dataset or use
existing ones, depending on availability.
\item \textbf{Model Training}: The selected models will be trained
with the datasets curated in the previous step.
\item \textbf{Optimization}: The selected models will be optimized
with respect to their parameters.
\item \textbf{Deployment to SBC}: The software prototype will be
deployed to the single-board computer.
\item \textbf{Evaluation}: The models will be evaluated extensively
and compared to other state-of-the-art systems. During evaluation,
the author seeks to provide a basis for answering the research
questions.
\end{enumerate}
Additionally, go into detail about how the literature was selected to
be relevant for the decisions underlying the choice of
models/algorithms. Mention how literature in general was found (search
terms, platforms, etc.).
During the literature review, the search is centered around the terms
\emph{plant classification}, \emph{plant state classification},
\emph{plant detection}, \emph{water stress detection}, \emph{machine
learning agriculture}, \emph{crop machine learning} and \emph{remote
sensing}. These terms provide a solid basis for understanding the
state of the art in plant detection and stress classification. We will
use multiple search engines such as Google Scholar, Semantic Scholar,
the ACM Digital Library, and IEEE Xplore. It is common to only publish
research papers in preprint form in the data science and machine
learning fields. For this reason, we will also reference arXiv.org for
these papers. The work discovered in this way will also lead to
further insights about the type of models which are commonly used.
In order to find and select appropriate datasets to train the models
on, we will survey the existing big datasets for classes we can
use. Datasets such as the \gls{coco}~\cite{lin2015} and
\gls{pascal-voc}~\cite{everingham2010} contain the highly relevant
class \emph{Potted Plant}. By extracting only these classes from
multiple datasets and concatenating them together, it is possible to
create one unified dataset which only contains the classes necessary
for training the model.
The training of the models will happen in an environment where more
computational resources are available than what the~\gls{sbc}
offers. We will deploy the final model with the~\gls{api} to
the~\gls{sbc} after training and optimization. Furthermore, training
will happen in tandem with a continuous evaluation process. After
every iteration of the model, an evaluation run against the test set
determines if there has been an improvement in performance. The
results of the evaluation feed back into the parameter selection at
the beginning of each training phase. Small changes to the training
parameters, augmentations or structure of the model are followed by
another test phase. The iterative nature of the development of the
prototype increases the likelihood that the model's performance is not
only locally maximal but also as close as possible to the global
maximum.
In the final evaluation phase, we will measure the resulting model
against the test set and evaluate its performance with common
metrics. The aim is to first provide a solid basis of facts regarding
the model(s). Second, the results will be discussed in detail. Third,
we will cross-check the results with the hypotheses from
section~\ref{sec:motivation} and determine whether the aim of the work
has been met, and—if not—give reasons for the rejection of all or part
of the hypotheses.
Overall, the development of our application follows an evolutionary
pototyping process~\cite{davis1992,sears2007}. Instead of producing a
full-fledged product from the start, development happens iteratively
in phases. The main phases and their order for the prototype at hand
are: model selection, implementation, and evaluation. The results of
each phase—for example, which model has been selected—inform the
decisions which have to be made in the next phase (implementation). In
other words, every subsequent phase is dependent on the results of the
previous phase. All three phases, in turn, constitute one iteration
within the prototyping process. At the start of the next prototype,
the results of the previous iteration determine the path forward.
The decision to use an evolutionary prototyping process follows in
large part from the problem to be solved (as specified in
section~\ref{sec:motivation}). Since the critical requirements have
been established from the start, it is possible to build a solid
prototype from the beginning by implementing only those features which
are well-understood. The aim is to allow the developer to explore the
problem further so that additional requirements which arise during
development can be incorporated properly.
The prototyping process is embedded within the concepts of the
\emph{Scientific Method}. This thesis not only produces a prototype,
but also explores the problem of plant detection and classification
scientifically. Exploration of the problem requires making falsifiable
hypotheses (see section~\ref{sec:motivation}), gathering empirical
evidence (see section~\ref{sec:results}), and accepting or rejecting
the initial hypotheses (see section~\ref{sec:discussion}). Empirical
evidence is provided by measuring the model(s) against out-of-sample
test sets. This provides the necessary foundation for acceptance or
rejection of the hypotheses.
\section{Thesis Structure}
\label{sec:structure}
The first part of the thesis (chapter~\ref{chap:background}) contains
the theoretical basis of the models which we use for the
prototype. Chapter~\ref{chap:design} goes into detail about the design
of the prototype, the construction of the training/test sets and how
the prototype reports its results via its REST
API. Chapter~\ref{chap:evaluation} shows the results of the testing
phases as well as the performance of the aggregate model. Futhermore,
the results are compared with the expectations and it is discussed
whether they are explainable in the context of the task at hand as
well as benchmark results from other datasets
(COCO). Chapter~\ref{chap:conclusion} concludes the thesis with an
outlook on further research questions and possible improvements.
prototype. Chapter~\ref{chap:design} goes into detail about the
requirements for the prototype, the overall design and architecture of
the recognition and classification pipeline, and the structure and
unique properties of the selected
models. Chapter~\ref{chap:implementation} expands on how the datasets
are used during training as well as how the prototype publishes its
classification results. Chapter~\ref{chap:evaluation} shows the
results of the testing phases as well as the performance of the
aggregate model. Futhermore, the results are compared with the
expectations and it is discussed whether they are explainable in the
context of the task at hand as well as benchmark results from other
datasets (\gls{coco}~\cite{lin2015}). Chapter~\ref{chap:conclusion}
concludes the thesis with a summary and an outlook on possible
improvements and further research questions.
\chapter{Theoretical Background}
\label{chap:background}
@ -553,16 +637,17 @@ loop on the prototype in Figure~\ref{fig:setup}.
\begin{figure}
\centering
\includegraphics[width=0.8\textwidth]{graphics/setup.pdf}
\caption{Methodological approach for the prototype. The prototype
will run in a loop which starts at the top left corner. First, the
camera attached to the prototype takes images of plants. These
images are passed to the models running on the prototype. The
first model generates bounding boxes for all detected plants. The
bounding boxes are used to cut out the individual plants and pass
them to the state classifier in sequence. The classifier outputs a
probability score indicating the amount of stress the plant is
experiencing. After a set amount of time, the camera takes a
picture again and the process continues indefinitely.}
\caption[Methodological approach for the prototype.]{Methodological
approach for the prototype. The prototype will run in a loop which
starts at the top left corner. First, the camera attached to the
prototype takes images of plants. These images are passed to the
models running on the prototype. The first model generates
bounding boxes for all detected plants. The bounding boxes are
used to cut out the individual plants and pass them to the state
classifier in sequence. The classifier outputs a probability score
indicating the amount of stress the plant is experiencing. After a
set amount of time, the camera takes a picture again and the
process continues indefinitely.}
\label{fig:setup}
\end{figure}
@ -1457,4 +1542,9 @@ Estimated 1 page for this section
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% End: