Finish Introduction

This commit is contained in:
Tobias Eidelpes 2023-09-20 08:36:11 +02:00
parent 2e7c669e1a
commit 94a01f81e5
3 changed files with 188 additions and 52 deletions

View File

@ -149,6 +149,23 @@
keywords = {Confusion matrix,Crop phenotyping,DCNN,Digital agriculture,Machine learning} keywords = {Confusion matrix,Crop phenotyping,DCNN,Digital agriculture,Machine learning}
} }
@article{davis1992,
title = {Operational Prototyping: A New Development Approach},
shorttitle = {Operational Prototyping},
author = {Davis, A.M.},
date = {1992-09},
journaltitle = {IEEE Software},
volume = {9},
number = {5},
pages = {70--78},
issn = {1937-4194},
doi = {10.1109/52.156899},
abstract = {The two traditional types of software prototyping methods, throwaway prototyping and evolutionary prototyping, are compared, and prototyping's relation to conventional software development is discussed. Operational prototyping, a method that combines throwaway and evolutionary prototyping techniques by layering a rapid prototype over a solid evolutionary base, is described. Operational prototyping's implications for configuration management, quality assurance, and general project management are reviewed. The application of operational prototyping to a prototype ocean surveillance terminal is presented.{$<>$}},
eventtitle = {{{IEEE Software}}},
keywords = {Application software,Oceans,Programming,Project management,Prototypes,Quality assurance,Quality management,Software prototyping,Solids,Surveillance},
file = {/home/zenon/Zotero/storage/7NBJW3VE/Davis - 1992 - Operational prototyping a new development approac.pdf;/home/zenon/Zotero/storage/N96N3CIA/156899.html}
}
@inproceedings{deng2009, @inproceedings{deng2009,
title = {{{ImageNet}}: {{A Large-Scale Hierarchical Image Database}}}, title = {{{ImageNet}}: {{A Large-Scale Hierarchical Image Database}}},
shorttitle = {{{ImageNet}}}, shorttitle = {{{ImageNet}}},
@ -162,6 +179,24 @@
keywords = {Explosions,Image databases,Image retrieval,Information retrieval,Internet,Large-scale systems,Multimedia databases,Ontologies,Robustness,Spine} keywords = {Explosions,Image databases,Image retrieval,Information retrieval,Internet,Large-scale systems,Multimedia databases,Ontologies,Robustness,Spine}
} }
@article{everingham2010,
title = {The {{Pascal Visual Object Classes}} ({{VOC}}) {{Challenge}}},
author = {Everingham, Mark and Van Gool, Luc and Williams, Christopher K. I. and Winn, John and Zisserman, Andrew},
date = {2010-06-01},
journaltitle = {International Journal of Computer Vision},
shortjournal = {Int J Comput Vis},
volume = {88},
number = {2},
pages = {303--338},
issn = {1573-1405},
doi = {10.1007/s11263-009-0275-4},
urldate = {2023-09-07},
abstract = {The Pascal Visual Object Classes (VOC) challenge is a benchmark in visual object category recognition and detection, providing the vision and machine learning communities with a standard dataset of images and annotation, and standard evaluation procedures. Organised annually from 2005 to present, the challenge and its associated dataset has become accepted as the benchmark for object detection.},
langid = {english},
keywords = {Benchmark,Database,Object detection,Object recognition},
file = {/home/zenon/Zotero/storage/FCRT6NYG/Everingham et al. - 2010 - The Pascal Visual Object Classes (VOC) Challenge.pdf}
}
@inproceedings{he2016, @inproceedings{he2016,
title = {Deep {{Residual Learning}} for {{Image Recognition}}}, title = {Deep {{Residual Learning}} for {{Image Recognition}}},
booktitle = {2016 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})}, booktitle = {2016 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}} ({{CVPR}})},
@ -211,7 +246,6 @@
pages = {1200977}, pages = {1200977},
issn = {2624-8212}, issn = {2624-8212},
doi = {10.3389/frai.2023.1200977}, doi = {10.3389/frai.2023.1200977},
url = {http://arxiv.org/abs/2211.02972},
urldate = {2023-08-25}, urldate = {2023-08-25},
abstract = {Machine learning tasks often require a significant amount of training data for the resultant network to perform suitably for a given problem in any domain. In agriculture, dataset sizes are further limited by phenotypical differences between two plants of the same genotype, often as a result of differing growing conditions. Synthetically-augmented datasets have shown promise in improving existing models when real data is not available. In this paper, we employ a contrastive unpaired translation (CUT) generative adversarial network (GAN) and simple image processing techniques to translate indoor plant images to appear as field images. While we train our network to translate an image containing only a single plant, we show that our method is easily extendable to produce multiple-plant field images. Furthermore, we use our synthetic multi-plant images to train several YoloV5 nano object detection models to perform the task of plant detection and measure the accuracy of the model on real field data images. Including training data generated by the CUT-GAN leads to better plant detection performance compared to a network trained solely on real data.}, abstract = {Machine learning tasks often require a significant amount of training data for the resultant network to perform suitably for a given problem in any domain. In agriculture, dataset sizes are further limited by phenotypical differences between two plants of the same genotype, often as a result of differing growing conditions. Synthetically-augmented datasets have shown promise in improving existing models when real data is not available. In this paper, we employ a contrastive unpaired translation (CUT) generative adversarial network (GAN) and simple image processing techniques to translate indoor plant images to appear as field images. While we train our network to translate an image containing only a single plant, we show that our method is easily extendable to produce multiple-plant field images. Furthermore, we use our synthetic multi-plant images to train several YoloV5 nano object detection models to perform the task of plant detection and measure the accuracy of the model on real field data images. Including training data generated by the CUT-GAN leads to better plant detection performance compared to a network trained solely on real data.},
keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}, keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning},
@ -260,7 +294,6 @@
eprintclass = {cs}, eprintclass = {cs},
pages = {21--37}, pages = {21--37},
doi = {10.1007/978-3-319-46448-0_2}, doi = {10.1007/978-3-319-46448-0_2},
url = {http://arxiv.org/abs/1512.02325},
urldate = {2023-08-24}, urldate = {2023-08-24},
abstract = {We present a method for detecting objects in images using a single deep neural network. Our approach, named SSD, discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. Our SSD model is simple relative to methods that require object proposals because it completely eliminates proposal generation and subsequent pixel or feature resampling stage and encapsulates all computation in a single network. This makes SSD easy to train and straightforward to integrate into systems that require a detection component. Experimental results on the PASCAL VOC, MS COCO, and ILSVRC datasets confirm that SSD has comparable accuracy to methods that utilize an additional object proposal step and is much faster, while providing a unified framework for both training and inference. Compared to other single stage methods, SSD has much better accuracy, even with a smaller input image size. For \$300\textbackslash times 300\$ input, SSD achieves 72.1\% mAP on VOC2007 test at 58 FPS on a Nvidia Titan X and for \$500\textbackslash times 500\$ input, SSD achieves 75.1\% mAP, outperforming a comparable state of the art Faster R-CNN model. Code is available at https://github.com/weiliu89/caffe/tree/ssd .}, abstract = {We present a method for detecting objects in images using a single deep neural network. Our approach, named SSD, discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. Our SSD model is simple relative to methods that require object proposals because it completely eliminates proposal generation and subsequent pixel or feature resampling stage and encapsulates all computation in a single network. This makes SSD easy to train and straightforward to integrate into systems that require a detection component. Experimental results on the PASCAL VOC, MS COCO, and ILSVRC datasets confirm that SSD has comparable accuracy to methods that utilize an additional object proposal step and is much faster, while providing a unified framework for both training and inference. Compared to other single stage methods, SSD has much better accuracy, even with a smaller input image size. For \$300\textbackslash times 300\$ input, SSD achieves 72.1\% mAP on VOC2007 test at 58 FPS on a Nvidia Titan X and for \$500\textbackslash times 500\$ input, SSD achieves 75.1\% mAP, outperforming a comparable state of the art Faster R-CNN model. Code is available at https://github.com/weiliu89/caffe/tree/ssd .},
keywords = {Computer Science - Computer Vision and Pattern Recognition}, keywords = {Computer Science - Computer Vision and Pattern Recognition},
@ -367,13 +400,26 @@
pages = {970}, pages = {970},
issn = {2223-7747}, issn = {2223-7747},
doi = {10.3390/plants11070970}, doi = {10.3390/plants11070970},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9003083/},
urldate = {2023-08-25}, urldate = {2023-08-25},
abstract = {Plant stress is one of the most significant factors affecting plant fitness and, consequently, food production. However, plant stress may also be profitable since it behaves hormetically; at low doses, it stimulates positive traits in crops, such as the synthesis of specialized metabolites and additional stress tolerance. The controlled exposure of crops to low doses of stressors is therefore called hormesis management, and it is a promising method to increase crop productivity and quality. Nevertheless, hormesis management has severe limitations derived from the complexity of plant physiological responses to stress. Many technological advances assist plant stress science in overcoming such limitations, which results in extensive datasets originating from the multiple layers of the plant defensive response. For that reason, artificial intelligence tools, particularly Machine Learning (ML) and Deep Learning (DL), have become crucial for processing and interpreting data to accurately model plant stress responses such as genomic variation, gene and protein expression, and metabolite biosynthesis. In this review, we discuss the most recent ML and DL applications in plant stress science, focusing on their potential for improving the development of hormesis management protocols.}, abstract = {Plant stress is one of the most significant factors affecting plant fitness and, consequently, food production. However, plant stress may also be profitable since it behaves hormetically; at low doses, it stimulates positive traits in crops, such as the synthesis of specialized metabolites and additional stress tolerance. The controlled exposure of crops to low doses of stressors is therefore called hormesis management, and it is a promising method to increase crop productivity and quality. Nevertheless, hormesis management has severe limitations derived from the complexity of plant physiological responses to stress. Many technological advances assist plant stress science in overcoming such limitations, which results in extensive datasets originating from the multiple layers of the plant defensive response. For that reason, artificial intelligence tools, particularly Machine Learning (ML) and Deep Learning (DL), have become crucial for processing and interpreting data to accurately model plant stress responses such as genomic variation, gene and protein expression, and metabolite biosynthesis. In this review, we discuss the most recent ML and DL applications in plant stress science, focusing on their potential for improving the development of hormesis management protocols.},
pmcid = {PMC9003083}, pmcid = {PMC9003083},
file = {/home/zenon/Zotero/storage/56I7ELHW/Rico-Chávez et al. - 2022 - Machine Learning for Plant Stress Modeling A Pers.pdf} file = {/home/zenon/Zotero/storage/56I7ELHW/Rico-Chávez et al. - 2022 - Machine Learning for Plant Stress Modeling A Pers.pdf}
} }
@inproceedings{sears2007,
title = {Prototyping {{Tools}} and {{Techniques}}},
booktitle = {The {{Human-Computer Interaction Handbook}}},
editor = {Sears, Andrew and Jacko, Julie A. and Jacko, Julie A.},
date = {2007-09-19},
pages = {1043--1066},
publisher = {{CRC Press}},
doi = {10.1201/9781410615862-66},
urldate = {2023-09-17},
abstract = {We begin with our definition of a prototype and then discuss prototypes as design artifacts, introducing four dimensions for analyzing them. We then discuss the role of prototyping within the design process, in particular the concept of a design space, and how it is expanded and contracted by generating and selecting design ideas. The next three sections describe specific prototyping approaches: Rapid prototyping, both off-line and on-line, for early stages of design, iterative prototyping, which uses on-line development tools, and evolutionary prototyping, which must be based on a sound software architecture.},
isbn = {978-0-429-16397-5},
langid = {english}
}
@article{selvaraju2020, @article{selvaraju2020,
title = {Grad-{{CAM}}: {{Visual Explanations}} from {{Deep Networks}} via {{Gradient-based Localization}}}, title = {Grad-{{CAM}}: {{Visual Explanations}} from {{Deep Networks}} via {{Gradient-based Localization}}},
shorttitle = {Grad-{{CAM}}}, shorttitle = {Grad-{{CAM}}},

Binary file not shown.

View File

@ -85,6 +85,11 @@
\newacronym{sgd}{SGD}{Stochastic Gradient Descent} \newacronym{sgd}{SGD}{Stochastic Gradient Descent}
\newacronym{roc}{ROC}{Receiver Operating Characteristic} \newacronym{roc}{ROC}{Receiver Operating Characteristic}
\newacronym{auc}{AUC}{Area Under the Curve} \newacronym{auc}{AUC}{Area Under the Curve}
\newacronym{coco}{COCO}{Common Objects in Context}
\newacronym{pascal-voc}{\textsc{PASCAL} VOC}{\textsc{PASCAL} Visual Object Classes}
\newacronym{sbc}{SBC}{single-board computer}
\newacronym{api}{API}{Application Programming Interface}
\newacronym{rest}{REST}{Representational State Transfer}
\begin{document} \begin{document}
@ -168,7 +173,9 @@ installation which is deployable by homeowners, gathers data using
readily available hardware and performs computation on the device readily available hardware and performs computation on the device
without a connection to a central server. The device should be able to without a connection to a central server. The device should be able to
visually determine whether the plants in its field of view need water visually determine whether the plants in its field of view need water
or not and output its recommendation. or not and output its recommendation. The recommendation should then
be used as a data point off of which homeowners can automatically
water their plants with an automated watering system.
The aim of this work is to develop a prototype which can be deployed The aim of this work is to develop a prototype which can be deployed
by gardeners to survey plants and recommend watering or not. To this by gardeners to survey plants and recommend watering or not. To this
@ -177,14 +184,13 @@ plants in the field of view and then to determine if the plants need
water or not. The model should be suitable for edge devices equipped water or not. The model should be suitable for edge devices equipped
with a TPU or GPU but with otherwise limited processing with a TPU or GPU but with otherwise limited processing
capabilities. Examples of such systems include Google's Coral capabilities. Examples of such systems include Google's Coral
development board and the Nvidia Jetson series of single-board development board and the Nvidia Jetson series of~\glspl{sbc}. The
computers (SBCs). The model should make use of state-of-the-art model should make use of state-of-the-art algorithms from either
algorithms from either classical machine learning or deep classical machine learning or deep learning. The literature review
learning. The literature review will yield an appropriate machine will yield an appropriate machine learning method. Furthermore, the
learning method. Furthermore, the adaption of existing models adaption of existing models (transfer learning) for object detection
(transfer learning) for object detection to the domain of plant to the domain of plant recognition may provide higher performance than
recognition may provide higher performance than would otherwise be would otherwise be achievable within the time constraints.
achievable within the time constraints.
The model will be deployed to the single-board computer and evaluated The model will be deployed to the single-board computer and evaluated
using established and well-known metrics from the field of machine using established and well-known metrics from the field of machine
@ -245,44 +251,122 @@ learning. The evaluation will seek to answer the following questions:
The methodological approach consists of the following steps: The methodological approach consists of the following steps:
\begin{description} \begin{enumerate}
\item[Literature Review] The literature review informs the type of \item \textbf{Literature Review}: The literature review informs the
machine learning methods which are later applied during the type of machine learning methods which are later applied during the
implementation of the prototype. implementation of the prototype.
\item[Object Detection] Flowers present in the image will be detected \item \textbf{Dataset Curation}: After selecting the methods to use
using object detection methods. These methods will draw bounding for the implementation, we have to create our own dataset or use
boxes around the objects of interest. The output is fed into the existing ones, depending on availability.
next stage. \item \textbf{Model Training}: The selected models will be trained
\item[State Classification] The bounded images will be fed to a with the datasets curated in the previous step.
classifier which will determine whether the plant needs water or \item \textbf{Optimization}: The selected models will be optimized
not. with respect to their parameters.
\item[Deployment to SBC] The software prototype will be deployed to \item \textbf{Deployment to SBC}: The software prototype will be
the single-board computer. deployed to the single-board computer.
\item[Evaluation] The prototype will be evaluated to determine its \item \textbf{Evaluation}: The models will be evaluated extensively
feasibility and performance. During evaluation the author seeks to and compared to other state-of-the-art systems. During evaluation,
provide a basis for answering the research questions. the author seeks to provide a basis for answering the research
\end{description} questions.
\end{enumerate}
Additionally, go into detail about how the literature was selected to During the literature review, the search is centered around the terms
be relevant for the decisions underlying the choice of \emph{plant classification}, \emph{plant state classification},
models/algorithms. Mention how literature in general was found (search \emph{plant detection}, \emph{water stress detection}, \emph{machine
terms, platforms, etc.). learning agriculture}, \emph{crop machine learning} and \emph{remote
sensing}. These terms provide a solid basis for understanding the
state of the art in plant detection and stress classification. We will
use multiple search engines such as Google Scholar, Semantic Scholar,
the ACM Digital Library, and IEEE Xplore. It is common to only publish
research papers in preprint form in the data science and machine
learning fields. For this reason, we will also reference arXiv.org for
these papers. The work discovered in this way will also lead to
further insights about the type of models which are commonly used.
In order to find and select appropriate datasets to train the models
on, we will survey the existing big datasets for classes we can
use. Datasets such as the \gls{coco}~\cite{lin2015} and
\gls{pascal-voc}~\cite{everingham2010} contain the highly relevant
class \emph{Potted Plant}. By extracting only these classes from
multiple datasets and concatenating them together, it is possible to
create one unified dataset which only contains the classes necessary
for training the model.
The training of the models will happen in an environment where more
computational resources are available than what the~\gls{sbc}
offers. We will deploy the final model with the~\gls{api} to
the~\gls{sbc} after training and optimization. Furthermore, training
will happen in tandem with a continuous evaluation process. After
every iteration of the model, an evaluation run against the test set
determines if there has been an improvement in performance. The
results of the evaluation feed back into the parameter selection at
the beginning of each training phase. Small changes to the training
parameters, augmentations or structure of the model are followed by
another test phase. The iterative nature of the development of the
prototype increases the likelihood that the model's performance is not
only locally maximal but also as close as possible to the global
maximum.
In the final evaluation phase, we will measure the resulting model
against the test set and evaluate its performance with common
metrics. The aim is to first provide a solid basis of facts regarding
the model(s). Second, the results will be discussed in detail. Third,
we will cross-check the results with the hypotheses from
section~\ref{sec:motivation} and determine whether the aim of the work
has been met, and—if not—give reasons for the rejection of all or part
of the hypotheses.
Overall, the development of our application follows an evolutionary
pototyping process~\cite{davis1992,sears2007}. Instead of producing a
full-fledged product from the start, development happens iteratively
in phases. The main phases and their order for the prototype at hand
are: model selection, implementation, and evaluation. The results of
each phase—for example, which model has been selected—inform the
decisions which have to be made in the next phase (implementation). In
other words, every subsequent phase is dependent on the results of the
previous phase. All three phases, in turn, constitute one iteration
within the prototyping process. At the start of the next prototype,
the results of the previous iteration determine the path forward.
The decision to use an evolutionary prototyping process follows in
large part from the problem to be solved (as specified in
section~\ref{sec:motivation}). Since the critical requirements have
been established from the start, it is possible to build a solid
prototype from the beginning by implementing only those features which
are well-understood. The aim is to allow the developer to explore the
problem further so that additional requirements which arise during
development can be incorporated properly.
The prototyping process is embedded within the concepts of the
\emph{Scientific Method}. This thesis not only produces a prototype,
but also explores the problem of plant detection and classification
scientifically. Exploration of the problem requires making falsifiable
hypotheses (see section~\ref{sec:motivation}), gathering empirical
evidence (see section~\ref{sec:results}), and accepting or rejecting
the initial hypotheses (see section~\ref{sec:discussion}). Empirical
evidence is provided by measuring the model(s) against out-of-sample
test sets. This provides the necessary foundation for acceptance or
rejection of the hypotheses.
\section{Thesis Structure} \section{Thesis Structure}
\label{sec:structure} \label{sec:structure}
The first part of the thesis (chapter~\ref{chap:background}) contains The first part of the thesis (chapter~\ref{chap:background}) contains
the theoretical basis of the models which we use for the the theoretical basis of the models which we use for the
prototype. Chapter~\ref{chap:design} goes into detail about the design prototype. Chapter~\ref{chap:design} goes into detail about the
of the prototype, the construction of the training/test sets and how requirements for the prototype, the overall design and architecture of
the prototype reports its results via its REST the recognition and classification pipeline, and the structure and
API. Chapter~\ref{chap:evaluation} shows the results of the testing unique properties of the selected
phases as well as the performance of the aggregate model. Futhermore, models. Chapter~\ref{chap:implementation} expands on how the datasets
the results are compared with the expectations and it is discussed are used during training as well as how the prototype publishes its
whether they are explainable in the context of the task at hand as classification results. Chapter~\ref{chap:evaluation} shows the
well as benchmark results from other datasets results of the testing phases as well as the performance of the
(COCO). Chapter~\ref{chap:conclusion} concludes the thesis with an aggregate model. Futhermore, the results are compared with the
outlook on further research questions and possible improvements. expectations and it is discussed whether they are explainable in the
context of the task at hand as well as benchmark results from other
datasets (\gls{coco}~\cite{lin2015}). Chapter~\ref{chap:conclusion}
concludes the thesis with a summary and an outlook on possible
improvements and further research questions.
\chapter{Theoretical Background} \chapter{Theoretical Background}
\label{chap:background} \label{chap:background}
@ -553,16 +637,17 @@ loop on the prototype in Figure~\ref{fig:setup}.
\begin{figure} \begin{figure}
\centering \centering
\includegraphics[width=0.8\textwidth]{graphics/setup.pdf} \includegraphics[width=0.8\textwidth]{graphics/setup.pdf}
\caption{Methodological approach for the prototype. The prototype \caption[Methodological approach for the prototype.]{Methodological
will run in a loop which starts at the top left corner. First, the approach for the prototype. The prototype will run in a loop which
camera attached to the prototype takes images of plants. These starts at the top left corner. First, the camera attached to the
images are passed to the models running on the prototype. The prototype takes images of plants. These images are passed to the
first model generates bounding boxes for all detected plants. The models running on the prototype. The first model generates
bounding boxes are used to cut out the individual plants and pass bounding boxes for all detected plants. The bounding boxes are
them to the state classifier in sequence. The classifier outputs a used to cut out the individual plants and pass them to the state
probability score indicating the amount of stress the plant is classifier in sequence. The classifier outputs a probability score
experiencing. After a set amount of time, the camera takes a indicating the amount of stress the plant is experiencing. After a
picture again and the process continues indefinitely.} set amount of time, the camera takes a picture again and the
process continues indefinitely.}
\label{fig:setup} \label{fig:setup}
\end{figure} \end{figure}
@ -1457,4 +1542,9 @@ Estimated 1 page for this section
%%% TeX-master: t %%% TeX-master: t
%%% TeX-master: t %%% TeX-master: t
%%% TeX-master: t %%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% End: %%% End: