Add sections for theoretical background

2023-09-23 21:00:21 +02:00 · 2023-09-23 21:00:21 +02:00 · 99489d8d20
commit 99489d8d20
parent 5917923de3
5 changed files with 336 additions and 0 deletions
--- a/thesis/graphics/neural-network.pdf
+++ b/thesis/graphics/neural-network.pdf
--- a/thesis/graphics/neural-network.pdf_tex
+++ b/thesis/graphics/neural-network.pdf_tex
@ -0,0 +1,62 @@
+%% Creator: Inkscape 1.2.2 (b0a8486541, 2022-12-01), www.inkscape.org
+%% PDF/EPS/PS + LaTeX output extension by Johan Engelen, 2010
+%% Accompanies image file 'neural-network.pdf' (pdf, eps, ps)
+%%
+%% To include the image in your LaTeX document, write
+%%   \input{<filename>.pdf_tex}
+%%  instead of
+%%   \includegraphics{<filename>.pdf}
+%% To scale the image, write
+%%   \def\svgwidth{<desired width>}
+%%   \input{<filename>.pdf_tex}
+%%  instead of
+%%   \includegraphics[width=<desired width>]{<filename>.pdf}
+%%
+%% Images with a different path to the parent latex file can
+%% be accessed with the `import' package (which may need to be
+%% installed) using
+%%   \usepackage{import}
+%% in the preamble, and then including the image with
+%%   \import{<path to file>}{<filename>.pdf_tex}
+%% Alternatively, one can specify
+%%   \graphicspath{{<path to file>/}}
+%% 
+%% For more information, please see info/svg-inkscape on CTAN:
+%%   http://tug.ctan.org/tex-archive/info/svg-inkscape
+%%
+\begingroup%
+  \makeatletter%
+  \providecommand\color[2][]{%
+    \errmessage{(Inkscape) Color is used for the text in Inkscape, but the package 'color.sty' is not loaded}%
+    \renewcommand\color[2][]{}%
+  }%
+  \providecommand\transparent[1]{%
+    \errmessage{(Inkscape) Transparency is used (non-zero) for the text in Inkscape, but the package 'transparent.sty' is not loaded}%
+    \renewcommand\transparent[1]{}%
+  }%
+  \providecommand\rotatebox[2]{#2}%
+  \newcommand*\fsize{\dimexpr\f@size pt\relax}%
+  \newcommand*\lineheight[1]{\fontsize{\fsize}{#1\fsize}\selectfont}%
+  \ifx\svgwidth\undefined%
+    \setlength{\unitlength}{393.0933851bp}%
+    \ifx\svgscale\undefined%
+      \relax%
+    \else%
+      \setlength{\unitlength}{\unitlength * \real{\svgscale}}%
+    \fi%
+  \else%
+    \setlength{\unitlength}{\svgwidth}%
+  \fi%
+  \global\let\svgwidth\undefined%
+  \global\let\svgscale\undefined%
+  \makeatother%
+  \begin{picture}(1,0.88588194)%
+    \lineheight{1}%
+    \setlength\tabcolsep{0pt}%
+    \put(0,0){\includegraphics[width=\unitlength,page=1]{neural-network.pdf}}%
+    \put(0.89542084,0.64235106){\color[rgb]{0,0,0}\makebox(0,0)[t]{\lineheight{0}\smash{\begin{tabular}[t]{c}Output\end{tabular}}}}%
+    \put(0.48992367,0.83949506){\color[rgb]{0,0,0}\makebox(0,0)[t]{\lineheight{0}\smash{\begin{tabular}[t]{c}Hidden\end{tabular}}}}%
+    \put(0,0){\includegraphics[width=\unitlength,page=2]{neural-network.pdf}}%
+    \put(0.09506923,0.73759468){\color[rgb]{0,0,0}\makebox(0,0)[t]{\lineheight{0}\smash{\begin{tabular}[t]{c}Input\end{tabular}}}}%
+  \end{picture}%
+\endgroup%
--- a/thesis/references.bib
+++ b/thesis/references.bib
@ -404,6 +404,23 @@
  file = {/home/zenon/Zotero/storage/3JZLQNJT/Mateo-Aroca et al. - 2019 - Remote Image Capture System to Improve Aerial Supe.pdf}
 }

+@article{mcculloch1943,
+  title = {A Logical Calculus of the Ideas Immanent in Nervous Activity},
+  author = {McCulloch, Warren S. and Pitts, Walter},
+  date = {1943-12-01},
+  journaltitle = {The bulletin of mathematical biophysics},
+  shortjournal = {Bulletin of Mathematical Biophysics},
+  volume = {5},
+  number = {4},
+  pages = {115--133},
+  issn = {1522-9602},
+  doi = {10.1007/BF02478259},
+  urldate = {2023-09-22},
+  abstract = {Because of the “all-or-none” character of nervous activity, neural events and the relations among them can be treated by means of propositional logic. It is found that the behavior of every net can be described in these terms, with the addition of more complicated logical means for nets containing circles; and that for any logical expression satisfying certain conditions, one can find a net behaving in the fashion it describes. It is shown that many particular choices among possible neurophysiological assumptions are equivalent, in the sense that for every net behaving under one assumption, there exists another net which behaves under the other and gives the same results, although perhaps not in the same time. Various applications of the calculus are discussed.},
+  langid = {english},
+  keywords = {Excitatory Synapse,Inhibitory Synapse,Nervous Activity,Spatial Summation,Temporal Summation}
+}
+
@article{mcenroe2022,
  title = {A {{Survey}} on the {{Convergence}} of {{Edge Computing}} and {{AI}} for {{UAVs}}: {{Opportunities}} and {{Challenges}}},
  shorttitle = {A {{Survey}} on the {{Convergence}} of {{Edge Computing}} and {{AI}} for {{UAVs}}},
@ -420,6 +437,18 @@
  file = {/home/zenon/Zotero/storage/3ECY7VJ5/McEnroe et al. - 2022 - A Survey on the Convergence of Edge Computing and .pdf}
 }

+@book{mitchell1997a,
+  title = {Machine {{Learning}}},
+  author = {Mitchell, Thomas M.},
+  date = {1997-02},
+  edition = {1},
+  publisher = {{McGraw-Hill, Inc.}},
+  location = {{USA}},
+  abstract = {This exciting addition to the McGraw-Hill Series in Computer Science focuses on the concepts and techniques that contribute to the rapidly changing field of machine learning--including probability and statistics, artificial intelligence, and neural networks--unifying them all in a logical and coherent manner. Machine Learning serves as a useful reference tool for software developers and researchers, as well as an outstanding text for college students. Table of contents Chapter 1. Introduction Chapter 2. Concept Learning and the General-to-Specific Ordering Chapter 3. Decision Tree Learning Chapter 4. Artificial Neural Networks Chapter 5. Evaluating Hypotheses Chapter 6. Bayesian Learning Chapter 7. Computational Learning Theory Chapter 8. Instance-Based Learning Chapter 9. Inductive Logic Programming Chapter 10. Analytical Learning Chapter 11. Combining Inductive and Analytical Learning Chapter 12. Reinforcement Learning.},
+  isbn = {978-0-07-042807-2},
+  pagetotal = {432}
+}
+
@article{nadafzadeh2019,
  title = {Design and {{Fabrication}} of an {{Intelligent Control System}} for {{Determination}} of {{Watering Time}} for {{Turfgrass Plant Using Computer Vision System}} and {{Artificial Neural Network}}},
  author = {Nadafzadeh, Maryam and Abdanan Mehdizadeh, Saman},
@ -480,6 +509,45 @@
  file = {/home/zenon/Zotero/storage/56I7ELHW/Rico-Chávez et al. - 2022 - Machine Learning for Plant Stress Modeling A Pers.pdf}
 }

+@report{rosenblatt1957,
+  type = {Technical Report},
+  title = {The Perceptron: {{A}} Perceiving and Recognizing Automaton},
+  author = {Rosenblatt, Frank},
+  date = {1957-01},
+  number = {85-460-1},
+  institution = {{Cornell Aeronautical Laboratory}},
+  location = {{Ithaca, NY}},
+  file = {/home/zenon/Zotero/storage/FA8NA2T6/Rosenblatt - 1957 - The perceptron A perceiving and recognizing autom.pdf}
+}
+
+@book{rosenblatt1962,
+  title = {Principles of {{Neurodynamics}}: {{Perceptrons}} and the {{Theory}} of {{Brain Mechanisms}}},
+  shorttitle = {Principles of {{Neurodynamics}}},
+  author = {Rosenblatt, Frank},
+  date = {1962},
+  eprint = {7FhRAAAAMAAJ},
+  eprinttype = {googlebooks},
+  publisher = {{Spartan Books}},
+  abstract = {Part I attempts to review the background, basic sources of data, concepts, and methodology to be employed in the study of perceptrons. In Chapter 2, a brief review of the main alternative approaches to the development of brain models is presented. Chapter 3 considers the physiological and psychological criteria for a suitable model, and attempts to evaluate the empirical evidence which is available on several important issues. Chapter 4 contains basic definitions and some of the notation to be used in later sections are presented. Parts II and III are devoted to a summary of the established theoretical results obtained to date. Part II (Chapters 5 through 14) deals with the theory of three-layer series-coupled perceptrons, on which most work has been done to date. Part III (Chapters 15 through 20) deals with the theory of multi-layer and cross-coupled perceptrons. Part IV is concerned with more speculative models and problems for future analysis. Of necessity, the final chapters become increasingly heuristic in character, as the theory of perceptrons is not yet complete, and new possibilities are continually coming to light. (Author).},
+  langid = {english},
+  pagetotal = {648}
+}
+
+@article{samuel2000,
+  title = {Some Studies in Machine Learning Using the Game of Checkers},
+  author = {Samuel, A. L.},
+  date = {2000-01},
+  journaltitle = {IBM Journal of Research and Development},
+  volume = {44},
+  number = {1.2},
+  pages = {206--226},
+  issn = {0018-8646},
+  doi = {10.1147/rd.441.0206},
+  abstract = {Two machine-learning procedures have been investigated in some detail using the game of checkers. Enough work has been done to verify the fact that a computer can be programmed so that it will learn to play a better game of checkers than can be played by the person who wrote the program. Furthermore, it can learn to do this in a remarkably short period of time (8 or 10 hours of machine-playing time) when given only the rules of the game, a sense of direction, and a redundant and incomplete list of parameters which are thought to have something to do with the game, but whose correct signs and relative weights are unknown and unspecified. The principles of machine learning verified by these experiments are, of course, applicable to many other situations.},
+  eventtitle = {{{IBM Journal}} of {{Research}} and {{Development}}},
+  file = {/home/zenon/Zotero/storage/CQD65S78/5389202.html}
+}
+
@inproceedings{sears2007,
  title = {Prototyping {{Tools}} and {{Techniques}}},
  booktitle = {The {{Human-Computer Interaction Handbook}}},
--- a/thesis/thesis.pdf
+++ b/thesis/thesis.pdf
--- a/thesis/thesis.tex
+++ b/thesis/thesis.tex
@ -97,6 +97,9 @@
 \newacronym{sift}{SIFT}{Scale-Invariant Feature Transform}
 \newacronym{svm}{SVM}{Support Vector Machine}
 \newacronym{dpm}{DPM}{Deformable Part-Based Model}
+\newacronym{ai}{AI}{Artificial Intelligence}
+\newacronym{mfcc}{MFCC}{Mel-frequency Cepstral Coefficient}
+\newacronym{mlp}{MLP}{Multilayer Perceptron}

 \begin{document}

@ -404,6 +407,205 @@ Describe the contents of this chapter.

 Estimated 25 pages for this chapter.

+\section{Machine Learning}
+\label{sec:theory-ml}
+
+The term machine learning was first used by \textcite{samuel2000} in
+1959 in the context of teaching a machine how to play the game
+Checkers. \textcite{mitchell1997a} defines learning in the context of
+programs as:
+\begin{quote}
+  A computer program is said to \textbf{learn} from experience $E$
+  with respect to some class of tasks $T$ and performance measure $P$,
+  if its performance at tasks in $T$, as measured by $P$, improves
+  with experience $E$. \cite[p.2]{mitchell1997a}
+\end{quote}
+In other words, if the aim is to learn to win at a game, the
+performance measure $P$ is defined as the ability to win at that
+game. The tasks in $T$ are playing the game multiple times, and the
+experience $E$ is gained by letting the program play the game against
+itself.
+
+Machine learning is thought to be a sub-field of \gls{ai}. \gls{ai} a
+more general term for the scientific endeavour of creating things
+which possess the kind of intelligence we humans have. Since those
+things will not have been created \emph{naturally}, their intelligence
+is termed \emph{artificial}. Within the field of \gls{ai} there have
+been other approaches than what is commonly referred to as machine
+learning today.
+
+A major area of interest in the 1980s was the development of
+\emph{expert systems}. These systems try to approach problem solving
+as a rational decision-making process. Starting from a knowledge base,
+which contains facts and rules about the world and the problem to be
+solved, the expert system applies an inference engine to arrive at a
+conclusion. An advantage of these systems is that they can often
+explain how they came to a particular conclusion, allowing humans to
+verify and judge the inference process. This kind of explainability is
+missing in the neural network based approaches of today. However, an
+expert system needs a significant base of facts and rules to be able
+to do any meaningful inference. Outside of specialized domains such as
+medical diagnosis, expert systems have always failed at commonsense
+reasoning.
+
+Machine learning can be broadly divided into two distinct approaches:
+\emph{supervised} and \emph{unsupervised}. Supervised learning
+describes a process where the algorithm receives input values as well
+as their corresponding output values and tries to learn the function
+which maps inputs to outputs. This is called supervised learning
+because the model knows a target to map to. In unsupervised learning,
+in contrast, algorithms do not have access to labeled data or output
+values and therefore have to find patterns in the underlying
+inputs. There can be mixed approaches as in \emph{semi-supervised}
+learning where a model receives a small amount of labeled data as an
+aid to better extract the patterns in the unlabeled data. Which type
+of learning to apply depends heavily on the problem at hand. Tasks
+such as image classification and speech recognition are good
+candidates for supervised learning. If a model is required to
+\emph{generate} speech, text or images, an unsupervised approach makes
+more sense. We will go into detail about the general approach in
+supervised learning because it is used throughout this thesis when
+training the models.
+
+\subsection{Supervised Learning}
+\label{ssec:theory-sl}
+
+The overall steps when training a model with labeled data are as
+follows:
+
+\begin{enumerate}
+\item Determine which type of problem is to be solved and select
+  adequate training samples.
+\item Gather enough training samples and obtain their corresponding
+  targets (labels). This stage usually requires humans to create a
+  body of ground truth with which the model can compare itself.
+\item Select the type of representation of the inputs which is fed to
+  the model. The representation heavily relies on the amount of data
+  which the model can process in a reasonable amount of time. For
+  speech recognition, for example, raw waveforms are rarely fed to any
+  classifier. Instead, humans have to select a less granular and more
+  meaningful representation of the waveforms such as
+  \glspl{mfcc}. Selecting the representation to feed to the model is
+  also referred to as \emph{feature selection} or \emph{feature
+  engineering}.
+\item Select the structure of the model or algorithm and the learning
+  function. Depending on the problem, possible choices are
+  \glspl{svm}, \glspl{cnn} and many more.
+\item Train the model on the training set.
+\item Validate the results on out-of-sample data by computing common
+  metrics and comparing the results to other approaches.
+\item Optionally go back to 4. to select different algorithms or to
+  train the model with different parameters or adjusted training
+  sets. Depending on the results, one can also employ computational
+  methods such as hyperparameter optimization to find a better
+  combination of model parameters.
+\end{enumerate}
+
+These steps are generally the same for every type of supervised or
+semi-supervised machine learning approach. The implementation for
+solving a particular problem differs depending on the type of problem,
+how much data is available, how much can reasonably be labeled and any
+other special requirements such as favoring speed over accuracy.
+
+\subsection{Artificial Neural Networks}
+\label{ssec:theory-nn}
+
+Artificial neural networks are the building blocks of most
+state-of-the-art models in use today. The computer sciences have
+adopted the term from biology where it defines the complex structure
+in the human brain which allows us to experience and interact with the
+world around us. A neural network is necessarily composed of neurons
+which act as gatekeepers for the signals they receive. Depending on
+the inputs—electrochemical impulses, numbers, or other—the neuron
+\emph{excites} and produces an output value if the right conditions
+are met. This output value travels via connections to other neurons
+and acts as an input on their side. Each neuron and connection between
+the neurons has an associated weight which changes when the network
+learns. The weights increase or decrease the signal from the
+neuron. The neuron itself only passes a signal on to its output
+connections if the conditions of its \emph{activation function} have
+been met. This is typically a non-linear function. Multiple neurons
+are usually grouped together to form a \emph{layer} within the
+network. Multiple layers are stacked one after the other with
+connections in-between to form a neural network. Layers between the
+input and output layers are commonly referred to as \emph{hidden
+layers}. Figure~\ref{fig:neural-network} shows the structure of a
+three-layer fully-connected artificial neural network.
+
+\begin{figure}
+  \centering
+  \def\svgwidth{\columnwidth}
+  \scalebox{0.75}{\input{graphics/neural-network.pdf_tex}}
+  \caption[Structure of an artificial neural network]{Structure of an
+    artificial neural network. Information travels from left to right
+    through the network using the neurons and the connections between
+    them. Attribution en:User:Cburnett, CC BY-SA 3.0 via Wikimedia
+    Commons.}
+  \label{fig:neural-network}
+\end{figure}
+
+The earliest attempts at describing learning machines were by
+\textcite{mcculloch1943} with the idea of the \emph{perceptron}. This
+idea was implemented in a more general sense by
+\textcite{rosenblatt1957,rosenblatt1962} as a physical machine. At its
+core, the perceptron is the simplest artifical neural network with
+only one neuron in the center. The neuron takes all its inputs,
+aggregates them with a weighted sum and outputs 1 if the result is
+above some threshold $\theta$ and 0 if it is not (see
+equation~\ref{eq:perceptron}). This function is called the
+\emph{activation function} of a perceptron. A perceptron is a type of
+binary classifier which can only classify linearly separable
+variables.
+
+\begin{equation}
+  \label{eq:perceptron}
+  y =
+  \begin{cases}
+    1\;\mathrm{if}\;\sum_{i=1}^{n}w_i\cdot x_i\geq\theta \\
+    0\;\mathrm{if}\;\sum_{i=1}^{n}w_i\cdot x_i<\theta
+  \end{cases}
+\end{equation}
+
+Due to the inherent limitations of perceptrons to only be able to
+classify linearly separable data, \glspl{mlp} are the bedrock of
+modern artifical neural networks. By adding an input layer, a hidden
+layer, and an output layer as well as requiring the activation
+function of each neuron to be non-linear, a \gls{mlp} can classify
+also non-linear data. Every neuron in each layer is fully connected to
+all of the neurons in the next layer and it is the most
+straightforward case of a feedforward
+network. Figure~\ref{fig:neural-network} shows the skeleton of a
+\gls{mlp}.
+
+There are two types of artifical neural networks: feedforward and
+recurrent networks. Their names refer to the way information flows
+through the network. In a feedforward network, the information enters
+the network and flows only uni-directionally to the output nodes. In a
+recurrent network, information can also feed back into previous
+nodes. Which network is best used depends on the task at
+hand. Recurrent networks are usually necessary when \emph{context} is
+needed. For example, if the underlying data to classify is a time
+series, individual data points have some relation to the previous and
+next points in the series. Maintaining a bit of state is beneficial
+because networks should be able to capture these
+dependencies. However, having additional functionality for feeding
+information back into previous neurons and layers comes with increased
+complexity. A feedforward network, as depicted in
+Figure~\ref{fig:neural-network}, represents a simpler structure.
+
+\subsection{Activation Functions}
+\label{ssec:theory-activation-functions}
+
+
+
+\subsection{Loss Function}
+\label{ssec:theory-loss-function}
+
+\subsection{Backpropagation}
+\label{ssec:theory-backpropagation}
+
+
+
 \section{Object Detection}
 \label{sec:background-detection}

@ -519,6 +721,7 @@ increases in depth. \textcite{girshick2015} argue that \glspl{dpm}
 \glspl{cnn} by unrolling each step of the algorithm into a
 corresponding \gls{cnn} layer.

+
 \section{Classification}
 \label{sec:background-classification}

@ -1654,4 +1857,7 @@ Estimated 1 page for this section
 %%% TeX-master: t
 %%% TeX-master: t
 %%% TeX-master: t
+%%% TeX-master: t
+%%% TeX-master: t
+%%% TeX-master: t
 %%% End: