Add sections for theoretical background

This commit is contained in:
Tobias Eidelpes 2023-09-23 21:00:21 +02:00
parent 5917923de3
commit 99489d8d20
5 changed files with 336 additions and 0 deletions

Binary file not shown.

View File

@ -0,0 +1,62 @@
%% Creator: Inkscape 1.2.2 (b0a8486541, 2022-12-01), www.inkscape.org
%% PDF/EPS/PS + LaTeX output extension by Johan Engelen, 2010
%% Accompanies image file 'neural-network.pdf' (pdf, eps, ps)
%%
%% To include the image in your LaTeX document, write
%% \input{<filename>.pdf_tex}
%% instead of
%% \includegraphics{<filename>.pdf}
%% To scale the image, write
%% \def\svgwidth{<desired width>}
%% \input{<filename>.pdf_tex}
%% instead of
%% \includegraphics[width=<desired width>]{<filename>.pdf}
%%
%% Images with a different path to the parent latex file can
%% be accessed with the `import' package (which may need to be
%% installed) using
%% \usepackage{import}
%% in the preamble, and then including the image with
%% \import{<path to file>}{<filename>.pdf_tex}
%% Alternatively, one can specify
%% \graphicspath{{<path to file>/}}
%%
%% For more information, please see info/svg-inkscape on CTAN:
%% http://tug.ctan.org/tex-archive/info/svg-inkscape
%%
\begingroup%
\makeatletter%
\providecommand\color[2][]{%
\errmessage{(Inkscape) Color is used for the text in Inkscape, but the package 'color.sty' is not loaded}%
\renewcommand\color[2][]{}%
}%
\providecommand\transparent[1]{%
\errmessage{(Inkscape) Transparency is used (non-zero) for the text in Inkscape, but the package 'transparent.sty' is not loaded}%
\renewcommand\transparent[1]{}%
}%
\providecommand\rotatebox[2]{#2}%
\newcommand*\fsize{\dimexpr\f@size pt\relax}%
\newcommand*\lineheight[1]{\fontsize{\fsize}{#1\fsize}\selectfont}%
\ifx\svgwidth\undefined%
\setlength{\unitlength}{393.0933851bp}%
\ifx\svgscale\undefined%
\relax%
\else%
\setlength{\unitlength}{\unitlength * \real{\svgscale}}%
\fi%
\else%
\setlength{\unitlength}{\svgwidth}%
\fi%
\global\let\svgwidth\undefined%
\global\let\svgscale\undefined%
\makeatother%
\begin{picture}(1,0.88588194)%
\lineheight{1}%
\setlength\tabcolsep{0pt}%
\put(0,0){\includegraphics[width=\unitlength,page=1]{neural-network.pdf}}%
\put(0.89542084,0.64235106){\color[rgb]{0,0,0}\makebox(0,0)[t]{\lineheight{0}\smash{\begin{tabular}[t]{c}Output\end{tabular}}}}%
\put(0.48992367,0.83949506){\color[rgb]{0,0,0}\makebox(0,0)[t]{\lineheight{0}\smash{\begin{tabular}[t]{c}Hidden\end{tabular}}}}%
\put(0,0){\includegraphics[width=\unitlength,page=2]{neural-network.pdf}}%
\put(0.09506923,0.73759468){\color[rgb]{0,0,0}\makebox(0,0)[t]{\lineheight{0}\smash{\begin{tabular}[t]{c}Input\end{tabular}}}}%
\end{picture}%
\endgroup%

View File

@ -404,6 +404,23 @@
file = {/home/zenon/Zotero/storage/3JZLQNJT/Mateo-Aroca et al. - 2019 - Remote Image Capture System to Improve Aerial Supe.pdf} file = {/home/zenon/Zotero/storage/3JZLQNJT/Mateo-Aroca et al. - 2019 - Remote Image Capture System to Improve Aerial Supe.pdf}
} }
@article{mcculloch1943,
title = {A Logical Calculus of the Ideas Immanent in Nervous Activity},
author = {McCulloch, Warren S. and Pitts, Walter},
date = {1943-12-01},
journaltitle = {The bulletin of mathematical biophysics},
shortjournal = {Bulletin of Mathematical Biophysics},
volume = {5},
number = {4},
pages = {115--133},
issn = {1522-9602},
doi = {10.1007/BF02478259},
urldate = {2023-09-22},
abstract = {Because of the “all-or-none” character of nervous activity, neural events and the relations among them can be treated by means of propositional logic. It is found that the behavior of every net can be described in these terms, with the addition of more complicated logical means for nets containing circles; and that for any logical expression satisfying certain conditions, one can find a net behaving in the fashion it describes. It is shown that many particular choices among possible neurophysiological assumptions are equivalent, in the sense that for every net behaving under one assumption, there exists another net which behaves under the other and gives the same results, although perhaps not in the same time. Various applications of the calculus are discussed.},
langid = {english},
keywords = {Excitatory Synapse,Inhibitory Synapse,Nervous Activity,Spatial Summation,Temporal Summation}
}
@article{mcenroe2022, @article{mcenroe2022,
title = {A {{Survey}} on the {{Convergence}} of {{Edge Computing}} and {{AI}} for {{UAVs}}: {{Opportunities}} and {{Challenges}}}, title = {A {{Survey}} on the {{Convergence}} of {{Edge Computing}} and {{AI}} for {{UAVs}}: {{Opportunities}} and {{Challenges}}},
shorttitle = {A {{Survey}} on the {{Convergence}} of {{Edge Computing}} and {{AI}} for {{UAVs}}}, shorttitle = {A {{Survey}} on the {{Convergence}} of {{Edge Computing}} and {{AI}} for {{UAVs}}},
@ -420,6 +437,18 @@
file = {/home/zenon/Zotero/storage/3ECY7VJ5/McEnroe et al. - 2022 - A Survey on the Convergence of Edge Computing and .pdf} file = {/home/zenon/Zotero/storage/3ECY7VJ5/McEnroe et al. - 2022 - A Survey on the Convergence of Edge Computing and .pdf}
} }
@book{mitchell1997a,
title = {Machine {{Learning}}},
author = {Mitchell, Thomas M.},
date = {1997-02},
edition = {1},
publisher = {{McGraw-Hill, Inc.}},
location = {{USA}},
abstract = {This exciting addition to the McGraw-Hill Series in Computer Science focuses on the concepts and techniques that contribute to the rapidly changing field of machine learning--including probability and statistics, artificial intelligence, and neural networks--unifying them all in a logical and coherent manner. Machine Learning serves as a useful reference tool for software developers and researchers, as well as an outstanding text for college students. Table of contents Chapter 1. Introduction Chapter 2. Concept Learning and the General-to-Specific Ordering Chapter 3. Decision Tree Learning Chapter 4. Artificial Neural Networks Chapter 5. Evaluating Hypotheses Chapter 6. Bayesian Learning Chapter 7. Computational Learning Theory Chapter 8. Instance-Based Learning Chapter 9. Inductive Logic Programming Chapter 10. Analytical Learning Chapter 11. Combining Inductive and Analytical Learning Chapter 12. Reinforcement Learning.},
isbn = {978-0-07-042807-2},
pagetotal = {432}
}
@article{nadafzadeh2019, @article{nadafzadeh2019,
title = {Design and {{Fabrication}} of an {{Intelligent Control System}} for {{Determination}} of {{Watering Time}} for {{Turfgrass Plant Using Computer Vision System}} and {{Artificial Neural Network}}}, title = {Design and {{Fabrication}} of an {{Intelligent Control System}} for {{Determination}} of {{Watering Time}} for {{Turfgrass Plant Using Computer Vision System}} and {{Artificial Neural Network}}},
author = {Nadafzadeh, Maryam and Abdanan Mehdizadeh, Saman}, author = {Nadafzadeh, Maryam and Abdanan Mehdizadeh, Saman},
@ -480,6 +509,45 @@
file = {/home/zenon/Zotero/storage/56I7ELHW/Rico-Chávez et al. - 2022 - Machine Learning for Plant Stress Modeling A Pers.pdf} file = {/home/zenon/Zotero/storage/56I7ELHW/Rico-Chávez et al. - 2022 - Machine Learning for Plant Stress Modeling A Pers.pdf}
} }
@report{rosenblatt1957,
type = {Technical Report},
title = {The Perceptron: {{A}} Perceiving and Recognizing Automaton},
author = {Rosenblatt, Frank},
date = {1957-01},
number = {85-460-1},
institution = {{Cornell Aeronautical Laboratory}},
location = {{Ithaca, NY}},
file = {/home/zenon/Zotero/storage/FA8NA2T6/Rosenblatt - 1957 - The perceptron A perceiving and recognizing autom.pdf}
}
@book{rosenblatt1962,
title = {Principles of {{Neurodynamics}}: {{Perceptrons}} and the {{Theory}} of {{Brain Mechanisms}}},
shorttitle = {Principles of {{Neurodynamics}}},
author = {Rosenblatt, Frank},
date = {1962},
eprint = {7FhRAAAAMAAJ},
eprinttype = {googlebooks},
publisher = {{Spartan Books}},
abstract = {Part I attempts to review the background, basic sources of data, concepts, and methodology to be employed in the study of perceptrons. In Chapter 2, a brief review of the main alternative approaches to the development of brain models is presented. Chapter 3 considers the physiological and psychological criteria for a suitable model, and attempts to evaluate the empirical evidence which is available on several important issues. Chapter 4 contains basic definitions and some of the notation to be used in later sections are presented. Parts II and III are devoted to a summary of the established theoretical results obtained to date. Part II (Chapters 5 through 14) deals with the theory of three-layer series-coupled perceptrons, on which most work has been done to date. Part III (Chapters 15 through 20) deals with the theory of multi-layer and cross-coupled perceptrons. Part IV is concerned with more speculative models and problems for future analysis. Of necessity, the final chapters become increasingly heuristic in character, as the theory of perceptrons is not yet complete, and new possibilities are continually coming to light. (Author).},
langid = {english},
pagetotal = {648}
}
@article{samuel2000,
title = {Some Studies in Machine Learning Using the Game of Checkers},
author = {Samuel, A. L.},
date = {2000-01},
journaltitle = {IBM Journal of Research and Development},
volume = {44},
number = {1.2},
pages = {206--226},
issn = {0018-8646},
doi = {10.1147/rd.441.0206},
abstract = {Two machine-learning procedures have been investigated in some detail using the game of checkers. Enough work has been done to verify the fact that a computer can be programmed so that it will learn to play a better game of checkers than can be played by the person who wrote the program. Furthermore, it can learn to do this in a remarkably short period of time (8 or 10 hours of machine-playing time) when given only the rules of the game, a sense of direction, and a redundant and incomplete list of parameters which are thought to have something to do with the game, but whose correct signs and relative weights are unknown and unspecified. The principles of machine learning verified by these experiments are, of course, applicable to many other situations.},
eventtitle = {{{IBM Journal}} of {{Research}} and {{Development}}},
file = {/home/zenon/Zotero/storage/CQD65S78/5389202.html}
}
@inproceedings{sears2007, @inproceedings{sears2007,
title = {Prototyping {{Tools}} and {{Techniques}}}, title = {Prototyping {{Tools}} and {{Techniques}}},
booktitle = {The {{Human-Computer Interaction Handbook}}}, booktitle = {The {{Human-Computer Interaction Handbook}}},

Binary file not shown.

View File

@ -97,6 +97,9 @@
\newacronym{sift}{SIFT}{Scale-Invariant Feature Transform} \newacronym{sift}{SIFT}{Scale-Invariant Feature Transform}
\newacronym{svm}{SVM}{Support Vector Machine} \newacronym{svm}{SVM}{Support Vector Machine}
\newacronym{dpm}{DPM}{Deformable Part-Based Model} \newacronym{dpm}{DPM}{Deformable Part-Based Model}
\newacronym{ai}{AI}{Artificial Intelligence}
\newacronym{mfcc}{MFCC}{Mel-frequency Cepstral Coefficient}
\newacronym{mlp}{MLP}{Multilayer Perceptron}
\begin{document} \begin{document}
@ -404,6 +407,205 @@ Describe the contents of this chapter.
Estimated 25 pages for this chapter. Estimated 25 pages for this chapter.
\section{Machine Learning}
\label{sec:theory-ml}
The term machine learning was first used by \textcite{samuel2000} in
1959 in the context of teaching a machine how to play the game
Checkers. \textcite{mitchell1997a} defines learning in the context of
programs as:
\begin{quote}
A computer program is said to \textbf{learn} from experience $E$
with respect to some class of tasks $T$ and performance measure $P$,
if its performance at tasks in $T$, as measured by $P$, improves
with experience $E$. \cite[p.2]{mitchell1997a}
\end{quote}
In other words, if the aim is to learn to win at a game, the
performance measure $P$ is defined as the ability to win at that
game. The tasks in $T$ are playing the game multiple times, and the
experience $E$ is gained by letting the program play the game against
itself.
Machine learning is thought to be a sub-field of \gls{ai}. \gls{ai} a
more general term for the scientific endeavour of creating things
which possess the kind of intelligence we humans have. Since those
things will not have been created \emph{naturally}, their intelligence
is termed \emph{artificial}. Within the field of \gls{ai} there have
been other approaches than what is commonly referred to as machine
learning today.
A major area of interest in the 1980s was the development of
\emph{expert systems}. These systems try to approach problem solving
as a rational decision-making process. Starting from a knowledge base,
which contains facts and rules about the world and the problem to be
solved, the expert system applies an inference engine to arrive at a
conclusion. An advantage of these systems is that they can often
explain how they came to a particular conclusion, allowing humans to
verify and judge the inference process. This kind of explainability is
missing in the neural network based approaches of today. However, an
expert system needs a significant base of facts and rules to be able
to do any meaningful inference. Outside of specialized domains such as
medical diagnosis, expert systems have always failed at commonsense
reasoning.
Machine learning can be broadly divided into two distinct approaches:
\emph{supervised} and \emph{unsupervised}. Supervised learning
describes a process where the algorithm receives input values as well
as their corresponding output values and tries to learn the function
which maps inputs to outputs. This is called supervised learning
because the model knows a target to map to. In unsupervised learning,
in contrast, algorithms do not have access to labeled data or output
values and therefore have to find patterns in the underlying
inputs. There can be mixed approaches as in \emph{semi-supervised}
learning where a model receives a small amount of labeled data as an
aid to better extract the patterns in the unlabeled data. Which type
of learning to apply depends heavily on the problem at hand. Tasks
such as image classification and speech recognition are good
candidates for supervised learning. If a model is required to
\emph{generate} speech, text or images, an unsupervised approach makes
more sense. We will go into detail about the general approach in
supervised learning because it is used throughout this thesis when
training the models.
\subsection{Supervised Learning}
\label{ssec:theory-sl}
The overall steps when training a model with labeled data are as
follows:
\begin{enumerate}
\item Determine which type of problem is to be solved and select
adequate training samples.
\item Gather enough training samples and obtain their corresponding
targets (labels). This stage usually requires humans to create a
body of ground truth with which the model can compare itself.
\item Select the type of representation of the inputs which is fed to
the model. The representation heavily relies on the amount of data
which the model can process in a reasonable amount of time. For
speech recognition, for example, raw waveforms are rarely fed to any
classifier. Instead, humans have to select a less granular and more
meaningful representation of the waveforms such as
\glspl{mfcc}. Selecting the representation to feed to the model is
also referred to as \emph{feature selection} or \emph{feature
engineering}.
\item Select the structure of the model or algorithm and the learning
function. Depending on the problem, possible choices are
\glspl{svm}, \glspl{cnn} and many more.
\item Train the model on the training set.
\item Validate the results on out-of-sample data by computing common
metrics and comparing the results to other approaches.
\item Optionally go back to 4. to select different algorithms or to
train the model with different parameters or adjusted training
sets. Depending on the results, one can also employ computational
methods such as hyperparameter optimization to find a better
combination of model parameters.
\end{enumerate}
These steps are generally the same for every type of supervised or
semi-supervised machine learning approach. The implementation for
solving a particular problem differs depending on the type of problem,
how much data is available, how much can reasonably be labeled and any
other special requirements such as favoring speed over accuracy.
\subsection{Artificial Neural Networks}
\label{ssec:theory-nn}
Artificial neural networks are the building blocks of most
state-of-the-art models in use today. The computer sciences have
adopted the term from biology where it defines the complex structure
in the human brain which allows us to experience and interact with the
world around us. A neural network is necessarily composed of neurons
which act as gatekeepers for the signals they receive. Depending on
the inputs—electrochemical impulses, numbers, or other—the neuron
\emph{excites} and produces an output value if the right conditions
are met. This output value travels via connections to other neurons
and acts as an input on their side. Each neuron and connection between
the neurons has an associated weight which changes when the network
learns. The weights increase or decrease the signal from the
neuron. The neuron itself only passes a signal on to its output
connections if the conditions of its \emph{activation function} have
been met. This is typically a non-linear function. Multiple neurons
are usually grouped together to form a \emph{layer} within the
network. Multiple layers are stacked one after the other with
connections in-between to form a neural network. Layers between the
input and output layers are commonly referred to as \emph{hidden
layers}. Figure~\ref{fig:neural-network} shows the structure of a
three-layer fully-connected artificial neural network.
\begin{figure}
\centering
\def\svgwidth{\columnwidth}
\scalebox{0.75}{\input{graphics/neural-network.pdf_tex}}
\caption[Structure of an artificial neural network]{Structure of an
artificial neural network. Information travels from left to right
through the network using the neurons and the connections between
them. Attribution en:User:Cburnett, CC BY-SA 3.0 via Wikimedia
Commons.}
\label{fig:neural-network}
\end{figure}
The earliest attempts at describing learning machines were by
\textcite{mcculloch1943} with the idea of the \emph{perceptron}. This
idea was implemented in a more general sense by
\textcite{rosenblatt1957,rosenblatt1962} as a physical machine. At its
core, the perceptron is the simplest artifical neural network with
only one neuron in the center. The neuron takes all its inputs,
aggregates them with a weighted sum and outputs 1 if the result is
above some threshold $\theta$ and 0 if it is not (see
equation~\ref{eq:perceptron}). This function is called the
\emph{activation function} of a perceptron. A perceptron is a type of
binary classifier which can only classify linearly separable
variables.
\begin{equation}
\label{eq:perceptron}
y =
\begin{cases}
1\;\mathrm{if}\;\sum_{i=1}^{n}w_i\cdot x_i\geq\theta \\
0\;\mathrm{if}\;\sum_{i=1}^{n}w_i\cdot x_i<\theta
\end{cases}
\end{equation}
Due to the inherent limitations of perceptrons to only be able to
classify linearly separable data, \glspl{mlp} are the bedrock of
modern artifical neural networks. By adding an input layer, a hidden
layer, and an output layer as well as requiring the activation
function of each neuron to be non-linear, a \gls{mlp} can classify
also non-linear data. Every neuron in each layer is fully connected to
all of the neurons in the next layer and it is the most
straightforward case of a feedforward
network. Figure~\ref{fig:neural-network} shows the skeleton of a
\gls{mlp}.
There are two types of artifical neural networks: feedforward and
recurrent networks. Their names refer to the way information flows
through the network. In a feedforward network, the information enters
the network and flows only uni-directionally to the output nodes. In a
recurrent network, information can also feed back into previous
nodes. Which network is best used depends on the task at
hand. Recurrent networks are usually necessary when \emph{context} is
needed. For example, if the underlying data to classify is a time
series, individual data points have some relation to the previous and
next points in the series. Maintaining a bit of state is beneficial
because networks should be able to capture these
dependencies. However, having additional functionality for feeding
information back into previous neurons and layers comes with increased
complexity. A feedforward network, as depicted in
Figure~\ref{fig:neural-network}, represents a simpler structure.
\subsection{Activation Functions}
\label{ssec:theory-activation-functions}
\subsection{Loss Function}
\label{ssec:theory-loss-function}
\subsection{Backpropagation}
\label{ssec:theory-backpropagation}
\section{Object Detection} \section{Object Detection}
\label{sec:background-detection} \label{sec:background-detection}
@ -519,6 +721,7 @@ increases in depth. \textcite{girshick2015} argue that \glspl{dpm}
\glspl{cnn} by unrolling each step of the algorithm into a \glspl{cnn} by unrolling each step of the algorithm into a
corresponding \gls{cnn} layer. corresponding \gls{cnn} layer.
\section{Classification} \section{Classification}
\label{sec:background-classification} \label{sec:background-classification}
@ -1654,4 +1857,7 @@ Estimated 1 page for this section
%%% TeX-master: t %%% TeX-master: t
%%% TeX-master: t %%% TeX-master: t
%%% TeX-master: t %%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% TeX-master: t
%%% End: %%% End: