Add sections for theoretical background

2023-09-23 21:00:21 +02:00 · 2023-09-23 21:00:21 +02:00 · 99489d8d20
commit 99489d8d20
parent 5917923de3
5 changed files with 336 additions and 0 deletions
--- a/thesis/graphics/neural-network.pdf
+++ b/thesis/graphics/neural-network.pdf
--- a/thesis/graphics/neural-network.pdf_tex
+++ b/thesis/graphics/neural-network.pdf_tex
@ -0,0 +1,62 @@
 %% Creator: Inkscape 1.2.2 (b0a8486541, 2022-12-01), www.inkscape.org
 %% PDF/EPS/PS + LaTeX output extension by Johan Engelen, 2010
 %% Accompanies image file 'neural-network.pdf' (pdf, eps, ps)
 %%
 %% To include the image in your LaTeX document, write
 %%   \input{<filename>.pdf_tex}
 %%  instead of
 %%   \includegraphics{<filename>.pdf}
 %% To scale the image, write
 %%   \def\svgwidth{<desired width>}
 %%   \input{<filename>.pdf_tex}
 %%  instead of
 %%   \includegraphics[width=<desired width>]{<filename>.pdf}
 %%
 %% Images with a different path to the parent latex file can
 %% be accessed with the `import' package (which may need to be
 %% installed) using
 %%   \usepackage{import}
 %% in the preamble, and then including the image with
 %%   \import{<path to file>}{<filename>.pdf_tex}
 %% Alternatively, one can specify
 %%   \graphicspath{{<path to file>/}}
 %% 
 %% For more information, please see info/svg-inkscape on CTAN:
 %%   http://tug.ctan.org/tex-archive/info/svg-inkscape
 %%
 \begingroup%
  \makeatletter%
  \providecommand\color[2][]{%
    \errmessage{(Inkscape) Color is used for the text in Inkscape, but the package 'color.sty' is not loaded}%
    \renewcommand\color[2][]{}%
  }%
  \providecommand\transparent[1]{%
    \errmessage{(Inkscape) Transparency is used (non-zero) for the text in Inkscape, but the package 'transparent.sty' is not loaded}%
    \renewcommand\transparent[1]{}%
  }%
  \providecommand\rotatebox[2]{#2}%
  \newcommand*\fsize{\dimexpr\f@size pt\relax}%
  \newcommand*\lineheight[1]{\fontsize{\fsize}{#1\fsize}\selectfont}%
  \ifx\svgwidth\undefined%
    \setlength{\unitlength}{393.0933851bp}%
    \ifx\svgscale\undefined%
      \relax%
    \else%
      \setlength{\unitlength}{\unitlength * \real{\svgscale}}%
    \fi%
  \else%
    \setlength{\unitlength}{\svgwidth}%
  \fi%
  \global\let\svgwidth\undefined%
  \global\let\svgscale\undefined%
  \makeatother%
  \begin{picture}(1,0.88588194)%
    \lineheight{1}%
    \setlength\tabcolsep{0pt}%
    \put(0,0){\includegraphics[width=\unitlength,page=1]{neural-network.pdf}}%
    \put(0.89542084,0.64235106){\color[rgb]{0,0,0}\makebox(0,0)[t]{\lineheight{0}\smash{\begin{tabular}[t]{c}Output\end{tabular}}}}%
    \put(0.48992367,0.83949506){\color[rgb]{0,0,0}\makebox(0,0)[t]{\lineheight{0}\smash{\begin{tabular}[t]{c}Hidden\end{tabular}}}}%
    \put(0,0){\includegraphics[width=\unitlength,page=2]{neural-network.pdf}}%
    \put(0.09506923,0.73759468){\color[rgb]{0,0,0}\makebox(0,0)[t]{\lineheight{0}\smash{\begin{tabular}[t]{c}Input\end{tabular}}}}%
  \end{picture}%
 \endgroup%
--- a/thesis/references.bib
+++ b/thesis/references.bib
@ -404,6 +404,23 @@
  file = {/home/zenon/Zotero/storage/3JZLQNJT/Mateo-Aroca et al. - 2019 - Remote Image Capture System to Improve Aerial Supe.pdf}
 }
@article{mcculloch1943,
  title = {A Logical Calculus of the Ideas Immanent in Nervous Activity},
  author = {McCulloch, Warren S. and Pitts, Walter},
  date = {1943-12-01},
  journaltitle = {The bulletin of mathematical biophysics},
  shortjournal = {Bulletin of Mathematical Biophysics},
  volume = {5},
  number = {4},
  pages = {115--133},
  issn = {1522-9602},
  doi = {10.1007/BF02478259},
  urldate = {2023-09-22},
  abstract = {Because of the “all-or-none” character of nervous activity, neural events and the relations among them can be treated by means of propositional logic. It is found that the behavior of every net can be described in these terms, with the addition of more complicated logical means for nets containing circles; and that for any logical expression satisfying certain conditions, one can find a net behaving in the fashion it describes. It is shown that many particular choices among possible neurophysiological assumptions are equivalent, in the sense that for every net behaving under one assumption, there exists another net which behaves under the other and gives the same results, although perhaps not in the same time. Various applications of the calculus are discussed.},
  langid = {english},
  keywords = {Excitatory Synapse,Inhibitory Synapse,Nervous Activity,Spatial Summation,Temporal Summation}
 }
@article{mcenroe2022,
  title = {A {{Survey}} on the {{Convergence}} of {{Edge Computing}} and {{AI}} for {{UAVs}}: {{Opportunities}} and {{Challenges}}},
  shorttitle = {A {{Survey}} on the {{Convergence}} of {{Edge Computing}} and {{AI}} for {{UAVs}}},
@ -420,6 +437,18 @@
  file = {/home/zenon/Zotero/storage/3ECY7VJ5/McEnroe et al. - 2022 - A Survey on the Convergence of Edge Computing and .pdf}
 }
@book{mitchell1997a,
  title = {Machine {{Learning}}},
  author = {Mitchell, Thomas M.},
  date = {1997-02},
  edition = {1},
  publisher = {{McGraw-Hill, Inc.}},
  location = {{USA}},
  abstract = {This exciting addition to the McGraw-Hill Series in Computer Science focuses on the concepts and techniques that contribute to the rapidly changing field of machine learning--including probability and statistics, artificial intelligence, and neural networks--unifying them all in a logical and coherent manner. Machine Learning serves as a useful reference tool for software developers and researchers, as well as an outstanding text for college students. Table of contents Chapter 1. Introduction Chapter 2. Concept Learning and the General-to-Specific Ordering Chapter 3. Decision Tree Learning Chapter 4. Artificial Neural Networks Chapter 5. Evaluating Hypotheses Chapter 6. Bayesian Learning Chapter 7. Computational Learning Theory Chapter 8. Instance-Based Learning Chapter 9. Inductive Logic Programming Chapter 10. Analytical Learning Chapter 11. Combining Inductive and Analytical Learning Chapter 12. Reinforcement Learning.},
  isbn = {978-0-07-042807-2},
  pagetotal = {432}
 }
@article{nadafzadeh2019,
  title = {Design and {{Fabrication}} of an {{Intelligent Control System}} for {{Determination}} of {{Watering Time}} for {{Turfgrass Plant Using Computer Vision System}} and {{Artificial Neural Network}}},
  author = {Nadafzadeh, Maryam and Abdanan Mehdizadeh, Saman},
@ -480,6 +509,45 @@
  file = {/home/zenon/Zotero/storage/56I7ELHW/Rico-Chávez et al. - 2022 - Machine Learning for Plant Stress Modeling A Pers.pdf}
 }
@report{rosenblatt1957,
  type = {Technical Report},
  title = {The Perceptron: {{A}} Perceiving and Recognizing Automaton},
  author = {Rosenblatt, Frank},
  date = {1957-01},
  number = {85-460-1},
  institution = {{Cornell Aeronautical Laboratory}},
  location = {{Ithaca, NY}},
  file = {/home/zenon/Zotero/storage/FA8NA2T6/Rosenblatt - 1957 - The perceptron A perceiving and recognizing autom.pdf}
 }
@book{rosenblatt1962,
  title = {Principles of {{Neurodynamics}}: {{Perceptrons}} and the {{Theory}} of {{Brain Mechanisms}}},
  shorttitle = {Principles of {{Neurodynamics}}},
  author = {Rosenblatt, Frank},
  date = {1962},
  eprint = {7FhRAAAAMAAJ},
  eprinttype = {googlebooks},
  publisher = {{Spartan Books}},
  abstract = {Part I attempts to review the background, basic sources of data, concepts, and methodology to be employed in the study of perceptrons. In Chapter 2, a brief review of the main alternative approaches to the development of brain models is presented. Chapter 3 considers the physiological and psychological criteria for a suitable model, and attempts to evaluate the empirical evidence which is available on several important issues. Chapter 4 contains basic definitions and some of the notation to be used in later sections are presented. Parts II and III are devoted to a summary of the established theoretical results obtained to date. Part II (Chapters 5 through 14) deals with the theory of three-layer series-coupled perceptrons, on which most work has been done to date. Part III (Chapters 15 through 20) deals with the theory of multi-layer and cross-coupled perceptrons. Part IV is concerned with more speculative models and problems for future analysis. Of necessity, the final chapters become increasingly heuristic in character, as the theory of perceptrons is not yet complete, and new possibilities are continually coming to light. (Author).},
  langid = {english},
  pagetotal = {648}
 }
@article{samuel2000,
  title = {Some Studies in Machine Learning Using the Game of Checkers},
  author = {Samuel, A. L.},
  date = {2000-01},
  journaltitle = {IBM Journal of Research and Development},
  volume = {44},
  number = {1.2},
  pages = {206--226},
  issn = {0018-8646},
  doi = {10.1147/rd.441.0206},
  abstract = {Two machine-learning procedures have been investigated in some detail using the game of checkers. Enough work has been done to verify the fact that a computer can be programmed so that it will learn to play a better game of checkers than can be played by the person who wrote the program. Furthermore, it can learn to do this in a remarkably short period of time (8 or 10 hours of machine-playing time) when given only the rules of the game, a sense of direction, and a redundant and incomplete list of parameters which are thought to have something to do with the game, but whose correct signs and relative weights are unknown and unspecified. The principles of machine learning verified by these experiments are, of course, applicable to many other situations.},
  eventtitle = {{{IBM Journal}} of {{Research}} and {{Development}}},
  file = {/home/zenon/Zotero/storage/CQD65S78/5389202.html}
 }
@inproceedings{sears2007,
  title = {Prototyping {{Tools}} and {{Techniques}}},
  booktitle = {The {{Human-Computer Interaction Handbook}}},
--- a/thesis/thesis.pdf
+++ b/thesis/thesis.pdf
--- a/thesis/thesis.tex
+++ b/thesis/thesis.tex
@ -97,6 +97,9 @@
 \newacronym{sift}{SIFT}{Scale-Invariant Feature Transform}
 \newacronym{svm}{SVM}{Support Vector Machine}
 \newacronym{dpm}{DPM}{Deformable Part-Based Model}
 \newacronym{ai}{AI}{Artificial Intelligence}
 \newacronym{mfcc}{MFCC}{Mel-frequency Cepstral Coefficient}
 \newacronym{mlp}{MLP}{Multilayer Perceptron}
 \begin{document}
@ -404,6 +407,205 @@ Describe the contents of this chapter.
 Estimated 25 pages for this chapter.
 \section{Machine Learning}
 \label{sec:theory-ml}
 The term machine learning was first used by \textcite{samuel2000} in
 1959 in the context of teaching a machine how to play the game
 Checkers. \textcite{mitchell1997a} defines learning in the context of
 programs as:
 \begin{quote}
  A computer program is said to \textbf{learn} from experience $E$
  with respect to some class of tasks $T$ and performance measure $P$,
  if its performance at tasks in $T$, as measured by $P$, improves
  with experience $E$. \cite[p.2]{mitchell1997a}
 \end{quote}
 In other words, if the aim is to learn to win at a game, the
 performance measure $P$ is defined as the ability to win at that
 game. The tasks in $T$ are playing the game multiple times, and the
 experience $E$ is gained by letting the program play the game against
 itself.
 Machine learning is thought to be a sub-field of \gls{ai}. \gls{ai} a
 more general term for the scientific endeavour of creating things
 which possess the kind of intelligence we humans have. Since those
 things will not have been created \emph{naturally}, their intelligence
 is termed \emph{artificial}. Within the field of \gls{ai} there have
 been other approaches than what is commonly referred to as machine
 learning today.
 A major area of interest in the 1980s was the development of
 \emph{expert systems}. These systems try to approach problem solving
 as a rational decision-making process. Starting from a knowledge base,
 which contains facts and rules about the world and the problem to be
 solved, the expert system applies an inference engine to arrive at a
 conclusion. An advantage of these systems is that they can often
 explain how they came to a particular conclusion, allowing humans to
 verify and judge the inference process. This kind of explainability is
 missing in the neural network based approaches of today. However, an
 expert system needs a significant base of facts and rules to be able
 to do any meaningful inference. Outside of specialized domains such as
 medical diagnosis, expert systems have always failed at commonsense
 reasoning.
 Machine learning can be broadly divided into two distinct approaches:
 \emph{supervised} and \emph{unsupervised}. Supervised learning
 describes a process where the algorithm receives input values as well
 as their corresponding output values and tries to learn the function
 which maps inputs to outputs. This is called supervised learning
 because the model knows a target to map to. In unsupervised learning,
 in contrast, algorithms do not have access to labeled data or output
 values and therefore have to find patterns in the underlying
 inputs. There can be mixed approaches as in \emph{semi-supervised}
 learning where a model receives a small amount of labeled data as an
 aid to better extract the patterns in the unlabeled data. Which type
 of learning to apply depends heavily on the problem at hand. Tasks
 such as image classification and speech recognition are good
 candidates for supervised learning. If a model is required to
 \emph{generate} speech, text or images, an unsupervised approach makes
 more sense. We will go into detail about the general approach in
 supervised learning because it is used throughout this thesis when
 training the models.
 \subsection{Supervised Learning}
 \label{ssec:theory-sl}
 The overall steps when training a model with labeled data are as
 follows:
 \begin{enumerate}
 \item Determine which type of problem is to be solved and select
  adequate training samples.
 \item Gather enough training samples and obtain their corresponding
  targets (labels). This stage usually requires humans to create a
  body of ground truth with which the model can compare itself.
 \item Select the type of representation of the inputs which is fed to
  the model. The representation heavily relies on the amount of data
  which the model can process in a reasonable amount of time. For
  speech recognition, for example, raw waveforms are rarely fed to any
  classifier. Instead, humans have to select a less granular and more
  meaningful representation of the waveforms such as
  \glspl{mfcc}. Selecting the representation to feed to the model is
  also referred to as \emph{feature selection} or \emph{feature
  engineering}.
 \item Select the structure of the model or algorithm and the learning
  function. Depending on the problem, possible choices are
  \glspl{svm}, \glspl{cnn} and many more.
 \item Train the model on the training set.
 \item Validate the results on out-of-sample data by computing common
  metrics and comparing the results to other approaches.
 \item Optionally go back to 4. to select different algorithms or to
  train the model with different parameters or adjusted training
  sets. Depending on the results, one can also employ computational
  methods such as hyperparameter optimization to find a better
  combination of model parameters.
 \end{enumerate}
 These steps are generally the same for every type of supervised or
 semi-supervised machine learning approach. The implementation for
 solving a particular problem differs depending on the type of problem,
 how much data is available, how much can reasonably be labeled and any
 other special requirements such as favoring speed over accuracy.
 \subsection{Artificial Neural Networks}
 \label{ssec:theory-nn}
 Artificial neural networks are the building blocks of most
 state-of-the-art models in use today. The computer sciences have
 adopted the term from biology where it defines the complex structure
 in the human brain which allows us to experience and interact with the
 world around us. A neural network is necessarily composed of neurons
 which act as gatekeepers for the signals they receive. Depending on
 the inputs—electrochemical impulses, numbers, or other—the neuron
 \emph{excites} and produces an output value if the right conditions
 are met. This output value travels via connections to other neurons
 and acts as an input on their side. Each neuron and connection between
 the neurons has an associated weight which changes when the network
 learns. The weights increase or decrease the signal from the
 neuron. The neuron itself only passes a signal on to its output
 connections if the conditions of its \emph{activation function} have
 been met. This is typically a non-linear function. Multiple neurons
 are usually grouped together to form a \emph{layer} within the
 network. Multiple layers are stacked one after the other with
 connections in-between to form a neural network. Layers between the
 input and output layers are commonly referred to as \emph{hidden
 layers}. Figure~\ref{fig:neural-network} shows the structure of a
 three-layer fully-connected artificial neural network.
 \begin{figure}
  \centering
  \def\svgwidth{\columnwidth}
  \scalebox{0.75}{\input{graphics/neural-network.pdf_tex}}
  \caption[Structure of an artificial neural network]{Structure of an
    artificial neural network. Information travels from left to right
    through the network using the neurons and the connections between
    them. Attribution en:User:Cburnett, CC BY-SA 3.0 via Wikimedia
    Commons.}
  \label{fig:neural-network}
 \end{figure}
 The earliest attempts at describing learning machines were by
 \textcite{mcculloch1943} with the idea of the \emph{perceptron}. This
 idea was implemented in a more general sense by
 \textcite{rosenblatt1957,rosenblatt1962} as a physical machine. At its
 core, the perceptron is the simplest artifical neural network with
 only one neuron in the center. The neuron takes all its inputs,
 aggregates them with a weighted sum and outputs 1 if the result is
 above some threshold $\theta$ and 0 if it is not (see
 equation~\ref{eq:perceptron}). This function is called the
 \emph{activation function} of a perceptron. A perceptron is a type of
 binary classifier which can only classify linearly separable
 variables.
 \begin{equation}
  \label{eq:perceptron}
  y =
  \begin{cases}
    1\;\mathrm{if}\;\sum_{i=1}^{n}w_i\cdot x_i\geq\theta \\
    0\;\mathrm{if}\;\sum_{i=1}^{n}w_i\cdot x_i<\theta
  \end{cases}
 \end{equation}
 Due to the inherent limitations of perceptrons to only be able to
 classify linearly separable data, \glspl{mlp} are the bedrock of
 modern artifical neural networks. By adding an input layer, a hidden
 layer, and an output layer as well as requiring the activation
 function of each neuron to be non-linear, a \gls{mlp} can classify
 also non-linear data. Every neuron in each layer is fully connected to
 all of the neurons in the next layer and it is the most
 straightforward case of a feedforward
 network. Figure~\ref{fig:neural-network} shows the skeleton of a
 \gls{mlp}.
 There are two types of artifical neural networks: feedforward and
 recurrent networks. Their names refer to the way information flows
 through the network. In a feedforward network, the information enters
 the network and flows only uni-directionally to the output nodes. In a
 recurrent network, information can also feed back into previous
 nodes. Which network is best used depends on the task at
 hand. Recurrent networks are usually necessary when \emph{context} is
 needed. For example, if the underlying data to classify is a time
 series, individual data points have some relation to the previous and
 next points in the series. Maintaining a bit of state is beneficial
 because networks should be able to capture these
 dependencies. However, having additional functionality for feeding
 information back into previous neurons and layers comes with increased
 complexity. A feedforward network, as depicted in
 Figure~\ref{fig:neural-network}, represents a simpler structure.
 \subsection{Activation Functions}
 \label{ssec:theory-activation-functions}
 \subsection{Loss Function}
 \label{ssec:theory-loss-function}
 \subsection{Backpropagation}
 \label{ssec:theory-backpropagation}
 \section{Object Detection}
 \label{sec:background-detection}
@ -519,6 +721,7 @@ increases in depth. \textcite{girshick2015} argue that \glspl{dpm}
 \glspl{cnn} by unrolling each step of the algorithm into a
 corresponding \gls{cnn} layer.
 \section{Classification}
 \label{sec:background-classification}
@ -1654,4 +1857,7 @@ Estimated 1 page for this section
 %%% TeX-master: t
 %%% TeX-master: t
 %%% TeX-master: t
 %%% TeX-master: t
 %%% TeX-master: t
 %%% TeX-master: t
 %%% End: