Rework template

2020-02-27 12:39:14 +01:00 · 2020-02-27 12:39:14 +01:00 · 0b2d4d8c03
commit 0b2d4d8c03
17 changed files with 2328 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,20 @@
+_minted-main/
+
+main.acn
+main.aux
+main.bbl
+main.bcf
+main.blg
+main.fdb_latexmk
+main.fls
+main.glo
+main.glsdefs
+main.ist
+main.lof
+main.log
+main.lol
+main.out
+main.pdf
+main.run.xml
+main.synctex.gz
+main.toc
--- a/abbrev/acronym.tex
+++ b/abbrev/acronym.tex
@ -0,0 +1,23 @@
+\newacronym	{INSO}	    {INSO}	    {Industrial Software}
+\newacronym	{TU}	    {TU}	    {Technische Universit\"at Wien}
+\newacronym	{ZID}	    {ZID}	    {Zentraler Informatikdienst}
+\newacronym	{KISS}	    {KISS}	    {Keep It Sober and Significant}
+\newacronym {URL}       {URL}       {Uniform Resource Locator}
+\newacronym {URI}       {URI}       {Uniform Resource Identifier}
+\newacronym {WWW}       {WWW}       {World Wide Web}
+\newacronym {HTTP}      {HTTP}      {Hypertext Transfer Protocol}
+\newacronym {HTML}      {HTML}      {Hypertext Markup Language}
+\newacronym {DOM}       {DOM}       {Document Object Model}
+\newacronym {API}       {API}       {Application Programming Interface}
+\newacronym {XHTML}     {XHTML}     {Extensible Hypertext Markup Language}
+\newacronym {XML}       {XML}       {Extensible Markup Language}
+\newacronym {W3C}       {W3C}       {World Wide Web Consortium}
+\newacronym {WHATWG}    {WHATWG}    {Web Hypertext Application Technology Working Group}
+\newacronym {JSON}      {JSON}      {JavaScript Object Notation}
+\newacronym {PII}       {PII}       {Personally Identifiable Information}
+\newacronym {LSO}       {LSO}       {Local Shared Object}
+\newacronym {CSS}       {CSS}       {Cascading Style Sheets}
+\newacronym {RTB}       {RTB}       {Real Time Bidding}
+\newacronym {TLS}       {TLS}       {Transport Layer Security}
+\newacronym {VPN}       {VPN}       {Virtual Private Network}
+\newacronym {ISP}       {ISP}       {Internet Service Provider}
--- a/abbrev/acronym.tex.aux
+++ b/abbrev/acronym.tex.aux
@ -0,0 +1,158 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@setckpt{abbrev/acronym.tex}{
+\setcounter{page}{1}
+\setcounter{equation}{0}
+\setcounter{enumi}{0}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{0}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{chapter}{0}
+\setcounter{section}{0}
+\setcounter{subsection}{0}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{0}
+\setcounter{table}{0}
+\setcounter{parentequation}{0}
+\setcounter{su@anzahl}{0}
+\setcounter{LT@tables}{0}
+\setcounter{LT@chunks}{0}
+\setcounter{Item}{0}
+\setcounter{Hfootnote}{0}
+\setcounter{bookmark@seq@number}{0}
+\setcounter{FancyVerbLine}{0}
+\setcounter{linenumber}{1}
+\setcounter{LN@truepage}{0}
+\setcounter{FV@TrueTabGroupLevel}{0}
+\setcounter{FV@TrueTabCounter}{0}
+\setcounter{FV@HighlightLinesStart}{0}
+\setcounter{FV@HighlightLinesStop}{0}
+\setcounter{FancyVerbLineBreakLast}{0}
+\setcounter{float@type}{16}
+\setcounter{minted@FancyVerbLineTemp}{0}
+\setcounter{minted@pygmentizecounter}{0}
+\setcounter{listing}{0}
+\setcounter{lstnumber}{1}
+\setcounter{tabx@nest}{0}
+\setcounter{listtotal}{0}
+\setcounter{listcount}{0}
+\setcounter{liststart}{0}
+\setcounter{liststop}{0}
+\setcounter{citecount}{0}
+\setcounter{citetotal}{0}
+\setcounter{multicitecount}{0}
+\setcounter{multicitetotal}{0}
+\setcounter{instcount}{0}
+\setcounter{maxnames}{3}
+\setcounter{minnames}{3}
+\setcounter{maxitems}{3}
+\setcounter{minitems}{1}
+\setcounter{citecounter}{0}
+\setcounter{maxcitecounter}{0}
+\setcounter{savedcitecounter}{0}
+\setcounter{uniquelist}{0}
+\setcounter{uniquename}{0}
+\setcounter{refsection}{0}
+\setcounter{refsegment}{0}
+\setcounter{maxextratitle}{0}
+\setcounter{maxextratitleyear}{0}
+\setcounter{maxextraname}{2}
+\setcounter{maxextradate}{0}
+\setcounter{maxextraalpha}{0}
+\setcounter{abbrvpenalty}{50}
+\setcounter{highnamepenalty}{50}
+\setcounter{lownamepenalty}{25}
+\setcounter{maxparens}{3}
+\setcounter{parenlevel}{0}
+\setcounter{mincomprange}{10}
+\setcounter{maxcomprange}{100000}
+\setcounter{mincompwidth}{1}
+\setcounter{afterword}{0}
+\setcounter{savedafterword}{0}
+\setcounter{annotator}{0}
+\setcounter{savedannotator}{0}
+\setcounter{author}{0}
+\setcounter{savedauthor}{0}
+\setcounter{bookauthor}{0}
+\setcounter{savedbookauthor}{0}
+\setcounter{commentator}{0}
+\setcounter{savedcommentator}{0}
+\setcounter{editor}{0}
+\setcounter{savededitor}{0}
+\setcounter{editora}{0}
+\setcounter{savededitora}{0}
+\setcounter{editorb}{0}
+\setcounter{savededitorb}{0}
+\setcounter{editorc}{0}
+\setcounter{savededitorc}{0}
+\setcounter{foreword}{0}
+\setcounter{savedforeword}{0}
+\setcounter{holder}{0}
+\setcounter{savedholder}{0}
+\setcounter{introduction}{0}
+\setcounter{savedintroduction}{0}
+\setcounter{namea}{0}
+\setcounter{savednamea}{0}
+\setcounter{nameb}{0}
+\setcounter{savednameb}{0}
+\setcounter{namec}{0}
+\setcounter{savednamec}{0}
+\setcounter{translator}{0}
+\setcounter{savedtranslator}{0}
+\setcounter{shortauthor}{0}
+\setcounter{savedshortauthor}{0}
+\setcounter{shorteditor}{0}
+\setcounter{savedshorteditor}{0}
+\setcounter{labelname}{0}
+\setcounter{savedlabelname}{0}
+\setcounter{institution}{0}
+\setcounter{savedinstitution}{0}
+\setcounter{lista}{0}
+\setcounter{savedlista}{0}
+\setcounter{listb}{0}
+\setcounter{savedlistb}{0}
+\setcounter{listc}{0}
+\setcounter{savedlistc}{0}
+\setcounter{listd}{0}
+\setcounter{savedlistd}{0}
+\setcounter{liste}{0}
+\setcounter{savedliste}{0}
+\setcounter{listf}{0}
+\setcounter{savedlistf}{0}
+\setcounter{location}{0}
+\setcounter{savedlocation}{0}
+\setcounter{organization}{0}
+\setcounter{savedorganization}{0}
+\setcounter{origlocation}{0}
+\setcounter{savedoriglocation}{0}
+\setcounter{origpublisher}{0}
+\setcounter{savedorigpublisher}{0}
+\setcounter{publisher}{0}
+\setcounter{savedpublisher}{0}
+\setcounter{language}{0}
+\setcounter{savedlanguage}{0}
+\setcounter{origlanguage}{0}
+\setcounter{savedoriglanguage}{0}
+\setcounter{pageref}{0}
+\setcounter{savedpageref}{0}
+\setcounter{textcitecount}{0}
+\setcounter{textcitetotal}{0}
+\setcounter{textcitemaxnames}{0}
+\setcounter{biburlbigbreakpenalty}{100}
+\setcounter{biburlbreakpenalty}{200}
+\setcounter{biburlnumpenalty}{0}
+\setcounter{biburlucpenalty}{0}
+\setcounter{biburllcpenalty}{0}
+\setcounter{smartand}{1}
+\setcounter{bbx:relatedcount}{0}
+\setcounter{bbx:relatedtotal}{0}
+\setcounter{cbx@tempcnta}{0}
+\setcounter{cbx@tempcntb}{0}
+\setcounter{section@level}{0}
+\setcounter{lstlisting}{0}
+}
--- a/bibliography/references.bib
+++ b/bibliography/references.bib
--- a/chapters/abstract-de.tex
+++ b/chapters/abstract-de.tex
@ -0,0 +1,35 @@
+\documentclass[../main.tex]{subfiles}
+
+\begin{document}
+
+\chapter*{Kurzfassung}
+
+\emph{Über diese Vorlage:}
+Dieses Template dient als Vorlage für die Erstellung einer wissenschaftlichen
+Arbeit am INSO. Individuelle Erweiterungen, Strukturanpassungen und
+Layout-Veränderungen können und sollen selbstverständlich nach persönlichem
+Ermessen und in Rücksprache mit Ihrem Betreuer vorgenommen werden.
+
+\emph{Aufbau}:
+In der Kurzfassung werden auf einer 3/4 bis maximal einer Seite die Kernaussagen
+der Diplomarbeit zusammengefasst. Dabei sollte zunächst die Motivation/der
+Kontext der vorliegenden Arbeit dargestellt werden, und dann kurz die
+Frage-/Problemstellung erläutert werden, max. 1 Absatz! Im nächsten Absatz auf
+die Methode/Verfahrensweise/das konkrete Fallbeispiel eingehen, mit deren Hilfe
+die Ergebnisse erzielt wurden. Im Zentrum der Kurzfassung stehen die zentralen
+eigenen Ergebnisse der Arbeit, die den Wert der vorliegenden wissenschaftlichen
+Arbeit ausmachen. Hier auch, wenn vorhanden, eigene Publikationen erwähnen.
+
+\emph{Wichtig: Verständlichkeit!}
+Die Kurzfassung soll für Leser verständlich sein, denen das Gebiet der
+Arbeit fremd ist. Deshalb Abkürzungen immer zuerst ausschreiben, in Klammer
+dazu die Erklärung: z.B: \enquote{Im Rahmen der vorliegenden Arbeit werden
+Non Governmental-Organisationen (NGOs) behandelt, \ldots}. In \LaTeX wird
+diese bereits automatisch durch verwenden des Befehls \verb|\ac| erreicht.
+Für Details siehe Paket \texttt{glossaries}.
+
+\bigskip
+
+\section*{Schlüsselwörter}
+
+\end{document}
--- a/chapters/abstract-en.tex
+++ b/chapters/abstract-en.tex
@ -0,0 +1,29 @@
+\documentclass[../main.tex]{subfiles}
+
+\begin{document}
+
+\chapter*{Abstract}
+
+\emph{About this template}:
+This template helps writing a scientific document at INSO. Users of this
+template are welcome to make individual modifications, extensions, and changes
+to layout and typography in accordance with their advisor.
+
+\emph{Writing an abstract}: The abstract summarizes the most important
+information within less than one page. Within the first paragraph, present the
+motivation and context for your work, followed by the specific aims. In the next
+paragraph, describe your methodology / approach, and / or the specific case you
+are working on. The third paragraph describes the results and the contribution
+of your work.
+
+\emph{Comprehensibility}: People with different backgrounds who are novel to
+your area of work should be able to understand the abstract. Therefore, acronyms
+should only be used after their full definition has given. E.g., ``This work
+relates to non-governmental organizations (NGOs), \ldots''.
+
+\bigskip
+
+\section*{Keywords}
+%Keyword, important, SubjectOfMyPaper, FieldOfWork.
+
+\end{document}
--- a/chapters/conclusion.tex
+++ b/chapters/conclusion.tex
@ -0,0 +1,7 @@
+\documentclass[../main.tex]{subfiles}
+
+\begin{document}
+
+\chapter{Conclusion}
+
+\end{document}
--- a/chapters/defences.tex
+++ b/chapters/defences.tex
@ -0,0 +1,8 @@
+\documentclass[../main.tex]{subfiles}
+
+\begin{document}
+
+\chapter{Defences against Tracking}
+\label{chap:defences against tracking}
+
+\end{document}
--- a/chapters/developments.tex
+++ b/chapters/developments.tex
@ -0,0 +1,7 @@
+\documentclass[../main.tex]{subfiles}
+
+\begin{document}
+
+\chapter{Future Tracking Ecosystem Developments}
+
+\end{document}
--- a/chapters/implications.tex
+++ b/chapters/implications.tex
@ -0,0 +1,7 @@
+\documentclass[../main.tex]{subfiles}
+
+\begin{document}
+
+\chapter{Implications of Tracking}
+
+\end{document}
--- a/chapters/introduction.tex
+++ b/chapters/introduction.tex
@ -0,0 +1,16 @@
+\documentclass[../main.tex]{subfiles}
+
+\begin{document}
+
+\chapter{Introduction}
+
+\section{Terms and Scope}
+\label{sec:Terms and Scope}
+
+\section{Background and Related Work}
+\label{sec:Background and Related Work}
+
+\section{Structure of the Thesis}
+\label{sec:Structure of the Thesis}
+
+\end{document}
--- a/chapters/methods.tex
+++ b/chapters/methods.tex
@ -0,0 +1,551 @@
+\documentclass[../main.tex]{subfiles}
+\externaldocument{defences}
+
+\begin{document}
+
+\chapter{Tracking Methods}
+\label{chap:tracking methods}
+
+This chapter will go into detail about various tracking methods that have been
+used during the history of the web. It is important to note that some of those
+approaches to tracking date back to when the World Wide Web was still in its
+early development stages. Knowing where the techniques come from helps in
+correctly judging the impact they had and still have on the Internet as we use
+it today. Furthermore, knowledge about the past allows for better predictions of
+future changes in the tracking ecosystem.
+
+To aid in understanding how they work and where they fit in the tracking
+landscape, three different categories are identified and presented:
+session-based, storage-based and cache-based tracking methods. Each category
+uses different mechanisms and technologies to enable tracking of users. What
+most of them have in common, is that they try to place unique identifiers in
+different places, which can then be read on subsequent visits. Thus, a
+chronological ordering of events enables interested parties to infer not only
+usage statistics but also specific data about the entities behind those
+identifiers.
+
+\section{Session-based Tracking Methods}
+\label{sec:session-based tracking methods}
+
+One of the simplest and most used forms of tracking on the Internet rely on
+sessions. Since HTTP is a stateless protocol, web servers cannot by default keep
+track of any previous client requests. In order to implement specific features
+such as personalized advertising, some means to save current and recall previous
+states must be used. For this functionality, sessions were introduced. Sessions
+represent a temporary and interactive exchange of information between two
+parties. Due to their temporary nature, they have to be `brought up' at some
+point and `torn down' at a later point in time. It is not specified however,
+how long the period between establishing and stopping a session has to be. It
+could be only for a single browser session and terminated by the user manually,
+or it could be for as long as a year.
+
+
+\subsection{Passing Information in URLs}
+\label{subsec:passing information in urls}
+
+\glspl{URL} have first been proposed by Berners-Lee in 1994
+\cite{berners-leeUniformResourceLocators1994} and are based on \glspl{URI}
+\cite{berners-leeUniversalResourceIdentifiers1994}. The latter specifies a way
+to uniquely identify a particular resource. The former extends the \gls{URI}
+specification to include where and how a particular resource can be found.
+\glspl{URI} consist of multiple parts:
+
+\begin{enumerate}
+  \item a scheme (in some cases a specific protocol),
+  \item an optional authority (network host or domain name),
+  \item a path (a specific location on that host),
+  \item an optional query and
+  \item an optional fragment preceded by a hashtag (a sub resource pointing to
+    a specific location within the resource)
+\end{enumerate}
+
+To access a section called \texttt{introduction} in a blog post named
+\texttt{blog post} on a host with the domain name \texttt{example.com} over the
+\gls{HTTP}, a user might use the following \gls{URI}:
+
+\begin{verbatim}
+http://example.com/blogpost/#introduction
+\end{verbatim}
+
+Even though \glspl{URI} and \glspl{URL} are two different things, they are
+mostly used interchangeably today. Especially non-technical people refer to an
+address on the \gls{WWW} simply as a \gls{URL}.
+
+The optional query parameter is in most cases constructed of multiple
+\texttt{(key,value)} pairs, separated by delimiters such as \texttt{\&} and
+\texttt{;}. In the tracking context, query parameters can be used to pass
+information (e.g. unique identifiers) to the resource that is to be accessed by
+appending a unique string to all the links within the downloaded page. Since
+requests to pages are generally logged by the server, requesting multiple pages
+with the same unique identifier leaves a trail behind that can be used to
+compile a browsing history. Sharing information with other parties is not only
+limited to unique identifiers. \gls{URL} parameters can also be used to pass the
+referrer of a web page containing a query that has been submitted by the user.
+\citeauthor{falahrastegarTrackingPersonalIdentifiers2016} demonstrate such an
+example where an advertisement tracker logs a user's browsing history by storing
+the referrer into a \texttt{(key,value)} pair
+\cite[p.~37]{falahrastegarTrackingPersonalIdentifiers2016}. Other possibilities
+include encoding geographical data, network properties, user information (e.g.,
+e-mails) and authentication credentials.
+\citeauthor{westMeasuringPrivacyDisclosures2014} conducted a survey concerning
+the use of \gls{URL} Query Strings and found it to be in widespread use on the
+web \cite{westMeasuringPrivacyDisclosures2014}.
+
+\subsection{Hidden Form Fields}
+\label{subsec:hidden form fields}
+
+The \gls{HTML} provides a specification for form elements, which allow users to
+submit information (e.g., for authentication) to the server via POST or GET
+methods. Normally, a user would input data into a form and on clicking
+\emph{submit} the input would be sent to the server. Sometimes it is necessary
+to include additional information that the user did not enter. For this reason
+there exist \emph{hidden} web forms. Hidden web forms do not show on the website
+and therefore the user cannot enter any information. Similar to \gls{URL}
+parameters, the value parameter in a hidden field contains additional
+information like the user's preferred language for example. Since almost
+anything can be sent in a value parameter, hidden form fields present another
+way to maintain a session. A parameter containing a unique identifier will be
+sent with the data the user has submitted to the server. The server can then
+match the action the user took with the identifier. In case the server already
+knows that specific identifier from a previous interaction with the user, the
+gained information can now be added to the user's browsing profile. An example
+of a hidden web form is given in Listing~\ref{lst:hidden web form}, which has
+been adapted from \cite{InputFormInput}. In Line 15 a hidden web field is
+created and the \texttt{value} field is set by the server to contain a unique
+user identifier. Once the \emph{submit} button has been clicked, the identifier
+is sent to the server along with the data the user has filled in.
+
+\begin{listing}
+    \inputminted[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{html}{code/hidden-web-form.html}
+    \caption{Example of an \gls{HTTP} form containing a hidden field with
+    \texttt{id=userId}. The id is set by the web server dynamically so that every
+visitor has his/her unique identifier attached to the form.}
+    \label{lst:hidden web form}
+\end{listing}
+
+\subsection{HTTP Referer}
+\label{subsec:http referer}
+
+Providers of web services often want to know where visitors to their website
+come from to understand more about their users and their browsing habits. The
+\gls{HTTP} specification accounts for this by introducing the \emph{\gls{HTTP}
+Referer field} [\emph{sic}] in the header. By checking the referrer, the server
+can see where the request came from. In practice, a user clicks on a link on a
+web page and the current web page is sent as a \gls{URL} in the \gls{HTTP}
+Referer field. The header with the referrer information gets attached to the
+\gls{HTTP} request which is sent to the server. The server responds with the
+requested web page and can establish a link from the original web page to the
+new web page.  When applied to a majority of the requests on a site, the
+resulting data can be analyzed for promotional and statistical purposes.
+\citeauthor{malandrinoPrivacyAwarenessInformation2013} have shown that the
+\gls{HTTP} Referer is one of the most critical factors in leaking \gls{PII}
+\cite{malandrinoPrivacyAwarenessInformation2013}, because leakage of information
+relating to user's health has been identified as the most severe in terms of
+identifiability of users on the web.
+
+\subsection{Explicit Authentication}
+\label{subsec:explicit authentication}
+
+Explicit authentication requires a user to \emph{explicitly} log in or register
+to the website. This way, specific resources are only available to the user when
+he or she has authenticated themselves to the service. Actions taken on an
+authenticated user account are tied to that account and crafting a personal
+profile is more or less a built-in function in this case. Since merely asking a
+user to authenticate is a simple method, the extent to which it can be used is
+limited. Logged in users are generally not logged in across different browser
+sessions, unless they are using cookies to do so (see section~\ref{subsec:http
+cookies}), therefore limiting tracking to one session at a time. Furthermore,
+always requiring a logged in state can be a tiring task for users, because they
+have to be authenticated every time they visit a particular service. This can
+potentially pose a usability problem where users simply stop using the service
+or go to considerable lengths to avoid logging in. This largely depends on a
+cost-benefit analysis the users subconsciously undertake \cite{}. The third
+factor where this method is lacking, concerns the awareness of the user being
+tracked. Since tracking users depends on them actively logging in to the
+service, tracking them transparently is impossible. Even though most tracking
+efforts are not detected by the average user \cite{}, it is known that actions
+taken on an account are logged to provide better service through service
+optimization and profile personalization.
+
+Making an account on a website to use their services to their full extent, can
+be beneficial in some cases. Facebook for example, allows their users to
+configure what they want to share with the public and their friends. Research
+has shown however, that managing which posts get shown to whom is not as
+straightforward as one might think.
+\todo{Wrong chapter?} \citeauthor{liuAnalyzingFacebookPrivacy2011}
+\cite{liuAnalyzingFacebookPrivacy2011} conducted a survey where they asked
+Facebook users about their desired privacy and visibility settings and
+cross-checked them with the actual settings they have used for their posts. The
+results showed that in only 37\% of cases the users' expectations match the
+reality. Additionally, 36\% of content is left on the default privacy settings
+which set the visibility of posts to public, meaning that any Facebook user can
+view them.
+
+\subsection{window.name DOM Property}
+\label{subsec:window.name dom property}
+
+The \gls{DOM} is a platform and language agnostic \gls{API} which defines the
+logical structure of web documents (i.e., \gls{HTML}, \gls{XHTML} and \gls{XML})
+and the way they are accessed and manipulated. The \gls{DOM} was originally
+introduced by Netscape at the same time as JavaScript as the \gls{DOM} Level 0.
+The first recommendation (\gls{DOM} Level 1) was released in 1998 by the
+\gls{W3C} \gls{DOM} working group \cite{w3cDocumentObjectModel1998} which
+published its final recommendation (\gls{DOM} Level 3) in 2004. Since then the
+\gls{WHATWG} took over and in 2015 published the \gls{DOM} Level 4 standard
+\cite{whatwgDOMLivingStandard2020} which replaces the Level 3 specification. It
+works by organizing all objects in a document in a tree structure which allows
+individual parts to be altered when a specific event happens (e.g., user
+interaction). Furthermore, each object has properties which are either applied to
+all \gls{HTML} elements or only to a subset of all elements.
+
+One useful property for tracking purposes is the \texttt{window.name} property.
+Its original intention was to allow client-side JavaScript to get or set the
+name of the current window. Since windows do not have to have names, the
+window.name property is being used mostly for setting targets for hyperlinks and
+forms. Modern browsers allow storing up to two megabytes of data in the
+window.name property, which makes it a viable option for using it as a data
+storage or---more specifically---maintaining session variables. In order to
+store multiple variables in the window.name property, the values have first to
+be packed in some way because only a single string is allowed. A \gls{JSON}
+stringifier converts a normal string into a \gls{JSON} string which is then
+ready to be stored in the DOM property. Additionally, serializers can also
+convert JavaScript objects into a \gls{JSON} string. Normally JavaScript's
+same-origin policy prohibits making requests to servers in another domain, but
+the window.name property is accessible from other domains and resistant to page
+reloads. Maintaining a session across domains and without cookies is therefore
+possible and multiple implementations exist
+\cite{frankSessionVariablesCookies2008,zypWindowNameTransport2008}.
+
+\section{Storage-based Tracking Methods}
+\label{sec:storage-based tracking methods}
+
+Storage-based tracking methods are different to session-based tracking methods
+in that they try to store information on the client's computer not only for
+single sessions but for as long as desired. The following methods can be used to
+store session data as well but are not limited to that use case. They generally
+enable more advanced tracking approaches because they have information about the
+current browser instance and the operating system the browser is running on. Due
+to their nature of residing on the user's computer, they are in most cases
+harder to circumvent, especially when two or more methods are combined resulting
+in better resilience against simple defences.
+
+\subsection{HTTP Cookies}
+\label{subsec:http cookies}
+
+A method which is most often associated with tracking on the Internet is
+tracking with \gls{HTTP} cookies. Cookies are small files that are placed in the
+browser's storage on the user's computer. They are limited to four kilobytes in
+size and are generally used to identify and authenticate users and to store
+website preferences. They were introduced to the web to allow stateful
+information to be stored because the \gls{HTTP} is a stateless protocol and
+therefore does not have this capability. It is also a way of reducing the
+server's load by not having to recompute states every time a user visits a
+website. Shopping cart functionality for example can thus be implemented by
+setting a cookie in the user's browser, saving the items which are currently
+added to the shopping cart and giving the user the possibility to resume
+shopping at a later point provided that they do not delete their cookies. With
+the introduction of cookies, advertising companies could reidentify users by
+placing unique identifiers in the browser and reading them on subsequent visits.
+The first standard for cookies was published in 1997
+\cite{kristolHTTPStateManagement1997} and has since been updated multiple times
+\cite{kristolHTTPStateManagement2000,barthHTTPStateManagement2011}.
+
+Cookies can be divided into two categories: first party cookies, which are
+created by the domain the user has requested and third party cookies, which are
+placed in the user's browser by other domains that are generally not under the
+control of the first party. Whereas first party cookies are commonly not used
+for tracking but for the aforementioned shopping cart functionality for example
+or enabling e-commerce applications to function properly, third party cookies are
+popular with data brokerage firms (e.g., Datalogix, Experian, Equifax), online
+advertisers (e.g., DoubleClick) and---belonging to both of these categories in
+some cases---social media platforms (e.g., Facebook). The distinction between
+these two categories is not always clear, however. Google Analytics for example
+is considered to be a third party but offers their analytics services by setting
+a first party cookie in the user's browser in addition to loading JavaScript
+snippets from their servers. Therefore, categorizing cookies into those that
+serve third party web content and those that serve first party web content
+presents a more adequate approach.
+
+Cookies are set either by calling scripts that are embedded in a web page (e.g.,
+Google's \texttt{analytics.js}) or by using the \gls{HTTP} Set-Cookie response
+header. Once a request to a web server has been issued, the server can set a
+cookie in the Set-Cookie header and sends the response back to the client. On
+the client's side the cookie is stored by the browser and sent with subsequent
+requests to the same domain via the Cookie \gls{HTTP} header. An example of a
+cookie header is given in Listing~\ref{lst:session cookie header}. Because this
+example does not set an expiration date for the cookie, it sets a session
+cookie. Session cookies are limited to the current session and are deleted as
+soon as the session is `torn down'. By adding an expiration date (demonstrated
+in Listing~\ref{lst:permanent cookie header}) or a maximum age, the cookie
+becomes permanent. Additionally, the domain attribute can be specified, meaning
+that cookies which list a different domain than the origin, are rejected by the
+user agent \cite[Section 4.1.2.3]{barthHTTPStateManagement2011}. The same-origin
+policy applies to cookies, disallowing access by other domains.
+
+\begin{listing}
+    \inputminted[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{http}{code/session-cookie-header}
+    \caption{An example of an \gls{HTTP} header setting a session cookie.}
+    \label{lst:session cookie header}
+\end{listing}
+
+\begin{listing}
+    \inputminted[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{http}{code/permanent-cookie-header}
+    \caption{An example of an \gls{HTTP} header setting a permanent cookie.}
+    \label{lst:permanent cookie header}
+\end{listing}
+
+Distinguishing tracking and non-tracking cookies can be done with high accuracy
+by observing their expiration time and the length of the value field.
+\citeauthor{liTrackAdvisorTakingBack2015} \cite{liTrackAdvisorTakingBack2015}
+demonstrate a supervised learning approach to detecting tracking cookies with
+their tool \emph{TrackAdvisor}. They found that tracking cookies generally have
+a longer expiration time than non-tracking cookies and they need to have a
+sufficiently long value field carrying the unique identifier. Using this method,
+they found that only 10\% of tracking cookies have a lifetime of a single day or
+less while 80\% of non-tracking cookies expire before a day is over.
+Additionally, a length of more than 35 characters in the value field applies to
+80\% of tracking cookies and a value field of less than 35 characters applies to
+80\% of non-tracking cookies. \emph{Cookie Chunking}, where a cookie of larger
+length is split into multiple cookies with smaller length, did not appear to
+affect detection by their method negatively. They also present a site
+measurement of the Alexa Top 10,000 websites, finding that 46\% of websites use
+third party tracking. More recent research
+\cite{gonzalezCookieRecipeUntangling2017} has shown that tracking cookies do not
+have to be long lasting to accumulate data about users. Some cookies---like the
+\texttt{\_\_utma} cookie from Google Analytics for example---save a timestamp of
+the current visit with the unique identifier, thereby allowing to use cookies
+which last a short time but can be afterwards used in series to complete the
+whole picture. \citeauthor{gonzalezCookieRecipeUntangling2017}
+\cite{gonzalezCookieRecipeUntangling2017} have also found 20\% of observed
+cookies to be \gls{URL} or base64 encoded, making decoding of cookies a
+necessary step for analysis. Furthermore---and contrary to previous work---,
+cookie values are found in much more varieties than is assumed by approaches
+that only try to detect cookies by their expiration date and/or character
+length. They also presented an entity based matching algorithm to dissect
+cookies which contain more than a unique identifier. This allows for a better
+understanding and interpretation of complex cookies as they are found in
+advertising networks with a lot of reach (e.g., doubleclick.net). This
+information is particularly useful for building applications that effectively
+detect and block cookies (see chapter~\ref{chap:defences against tracking}).
+
+\subsection{Flash Cookies and Java JNLP PersistenceService}
+\label{subsec:flash cookies and java jnlp persistenceservice}
+
+Flash Cookies are similar to HTTP cookies in that they too are a store of
+information that helps websites and servers to recognize already seen users.
+They are referred to as \glspl{LSO} by Adobe and are part of the Adobe Flash
+Player runtime. Instead of storing data in the browser's storage, they have
+their own storage in a different location on the user's computer. Another
+difference is that they cannot only store 4 kilobytes of data but 100 kilobytes
+and they also have no expiration dates by default (\gls{HTTP} cookies live until
+the end of the session unless specified otherwise). Since Flash cookies are not
+created by means the browser normally supports (i.e., \gls{HTTP}, \gls{CSS})
+but by Adobe's Flash Player runtime, browsers are not managing Flash cookies.
+This means that, due to Flash cookies not being tied to a specific browser, they
+function across browsers. This capability makes them an interesting target for
+trackers to store their identifying information in, because out of the box
+browsers initially did not support removing Flash cookies and one had to
+manually set preferences in the \emph{Web Storage Settings panel} provided by
+the Flash Player runtime to get rid of them. Trackers were searching for a new
+way to store identifiers because users became increasingly aware of the dangers
+posed by \gls{HTTP} cookies and reacted by taking countermeasures.
+
+\citeauthor{soltaniFlashCookiesPrivacy2009}
+\cite{soltaniFlashCookiesPrivacy2009} were the first to report on the usage of
+Flash cookies by advertisers and popular websites. While surveying the top 100
+websites at the time, they found that 54\% of them used Flash cookies. Some
+websites were setting Flash cookies as well as \gls{HTTP} cookies with the same
+values, suggesting that Flash cookies serve as backup to \gls{HTTP} cookies.
+Several websites were found using Flash cookies to respawn already deleted
+\gls{HTTP} cookies, even across domains. \citeauthor{acarWebNeverForgets2014}
+\cite{acarWebNeverForgets2014} automated detecting Flash cookies and access to
+them by monitoring file access with the GNU/Linux \emph{strace} tool
+\cite{michaelStraceLinuxManual2020}. This allowed them to acquire data about
+Flash cookies respawning \gls{HTTP} cookies.  Their results show that six of the
+top 100 sites use Flash cookies for respawning.
+
+Even though Flash usage has declined during the last few years thanks to the
+development of the HTML5 standard, \citeauthor{buhovFLASH20thCentury2018}
+\cite{buhovFLASH20thCentury2018} have shown that despite major security flaws,
+Flash content is still served by 7.5\% of the top one million websites (2017).
+The W3Techs Web Technology Survey shows a similar trend and also offers an
+up-to-date measurement of 2.7\% of the top ten million websites for the year
+2020 \cite{w3techsHistoricalYearlyTrends2020}.  Due to the security concerns in
+using Flash, Google's popular video sharing platform YouTube switched by default
+to the HTML5 <video> tag in January of 2015
+\cite{youtubeengineeringYouTubeNowDefaults2015}. In 2017 Adobe announced that they
+will end-of-life Flash at the end of 2020, stopping updates and distribution
+\cite{adobecorporatecommunicationsFlashFutureInteractive2017}. Consequently,
+Chrome 76 and Firefox 69 disabled Flash by default and will drop support
+entirely in 2020.
+
+Similarly to Flash, Java also provides a way of storing data locally on the
+user's computer via the PersistenceService \gls{API}
+\cite{PersistenceServiceJNLPAPI2015}. It is used by the evercookie library
+(section~\ref{subsec:evercookie}) to store values for cookie respawning by
+injecting a Java applet into the \gls{DOM} of a page
+\cite{baumanEvercookieApplet2013}.
+
+\subsection{Evercookie}
+\label{subsec:evercookie}
+
+Evercookie is JavaScript code that can be embedded in websites which allows to
+permanently store information on the user's computer. When activated,
+information is not only stored in standard \gls{HTTP} cookies but also in
+various other places, providing redundancy where possible. A full list of
+locations used by Evercookie can be found on the project's github page
+\cite{kamkarSamykEvercookie2020}. In case the user wants to get rid of all
+information stored by visiting a website that uses evercookies, every location
+has to be cleared because if one remains, all the other cookies are restored.
+The cookie deletion mechanisms that are provided by browsers by default do not
+clear all locations where evercookies are stored, which makes evercookie almost
+impossible to avoid. Evercookie is open source and quietly implementing or using
+evercookie is therefore not easy to do. Additionally, it is reported on the
+project's github page that it might cause severe performance issues in browsers.
+
+Evercookie has been proposed and implemented by
+\citeauthor{kamkarEvercookieVirtuallyIrrevocable2010} in
+\cite{kamkarEvercookieVirtuallyIrrevocable2010}. Multiple surveys have tried to
+quantify the use of evercookie in the wild.
+\citeauthor{acarWebNeverForgets2014} provide a heuristic for detecting
+evercookies stored on the user's computer \cite{acarWebNeverForgets2014} and
+analyze evercookie usage in conjunction with cookie respawning.
+
+\subsection{Cookie Synchronization}
+\label{subsec:cookie synchronization}
+
+When trackers are using cookies to store unique identifiers to track users,
+every tracker assigns a different identifier to the same user, due to the
+same-origin policy disallowing interaction with other trackers. Because of this,
+sharing data between multiple trackers is difficult, since there are no easy
+ways to accurately match an accumulated profile history of one identifier to
+another. This problem has been solved by modern trackers by using a mechanism
+called Cookie Synchronization or Cookie Matching. This technique allows multiple
+trackers to open an information sharing channel between each other without
+necessarily having to know the website the user visits.
+
+\begin{figure}[ht]
+    \centering
+    \includegraphics[width=1\textwidth]{cookiesyncing}
+    \label{fig:cookie synchronization}
+    \caption{Cookie Synchronization in practice between two trackers
+    \emph{cloudflare.com} and \emph{google.com}.}
+\end{figure}
+
+An example of how Cookie Synchronization works in practice is given in
+Figure~\ref{fig:cookie synchronization}. The two parties that are interested in
+tracking the user are called \emph{cloudflare.com} and \emph{google.com} in this
+example. The user they want to track is called \emph{browser}. \emph{Browser}
+first visits \emph{website1.com} which loads JavaScript from
+\emph{cloudflare.com}. \emph{Cloudflare.com} sets a cookie in the browser with a
+tracking identifier called \emph{userID = 1234}. Next, \emph{browser} visits
+another website called \emph{website2.com} which loads an advertisement banner
+from \emph{google.com}. \emph{Google.com} also sets a cookie with the tracking
+identifier \emph{userID = ABCD}. \emph{Browser} has now two cookies from two
+different providers, each of them knowing the user under a different identifier.
+When \emph{browser} visits a third website called \emph{website3.com} which
+makes a request to \emph{cloudflare.com} and recognizes the user with the
+identifier \emph{userID = 1234}, \emph{cloudflare.com} sends an \gls{HTTP}
+redirect, redirecting \emph{browser} to \emph{google.com}. The redirect also
+contains an \gls{HTTP} Query String (see section~\ref{subsec:passing information
+in urls}) which adds a query like \emph{?userID=1234\&publisher=website3.com}.
+The complete GET request to \emph{google.com} might look like this:
+
+\begin{minted}[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{http}
+GET /index.html?userID=1234&publisher=website3.com HTTP/1.1
+User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0
+Host: google.com
+Cookie: userID=ABCD
+\end{minted}
+
+\emph{Google.com} therefore not only knows that the user with the identifier
+\emph{userID=ABCD} visited \emph{website3.com} but also that \emph{browser} is
+the same user as \emph{userID=1234}. Since the identifiers can now be traced
+back to the same person, the different cookies have been synchronized, allowing
+the two trackers to exchange information about the user without him or her
+knowing.
+
+Cookie Synchronization has seen widespread adoption especially in \gls{RTB}
+based auctions \cite{olejnikSellingPrivacyAuction2014}.
+\citeauthor{papadopoulosCookieSynchronizationEverything2019}
+\cite{papadopoulosCookieSynchronizationEverything2019} recorded and analyzed the
+browsing habits of 850 users over a time period of one year and found that 97\%
+of users with regular browsing activity were exposed to Cookie Synchronization
+at least once. Furthermore, they found that ``[...] the average user receives
+around 1 synchronization per 68 requests''
+\cite[p.~7]{papadopoulosCookieSynchronizationEverything2019}. In
+\cite{englehardtOnlineTracking1MillionSite2016} the authors crawl the top
+100,000 sites and find that 45 of the top 50 (90\%) third parties and 460 of the
+top 1000 (46\%) use Cookie Synchronization with at least one other party.
+\emph{Doubleclick.net} being at the top sharing 108 cookies with 118 other third
+parties. \citeauthor{papadopoulosExclusiveHowSynced2018} show in
+\cite{papadopoulosExclusiveHowSynced2018} the threat that Cookie Synchronization
+poses to encrypted \gls{TLS} sessions by performing the cookie-syncing over
+unencrypted \gls{HTTP} even though the original request to the website was
+encrypted. This highlights the serious privacy implications for users of
+\gls{VPN} services trying to safeguard their traffic from a potentially
+malicious \gls{ISP}.
+
+\subsection{Silverlight Isolated Storage}
+\label{subsec:silverlight isolated storage}
+
+Silverlight Isolated Storage can also be used for storing data for tracking
+purposes on the user's computer. It has been compared to Adobe's Flash
+technology as it too requires a plugin from Microsoft to function. Available for
+storage are 100 kilobytes which is the same amount Flash cookies can store.
+Silverlight does not work in the private browsing mode and can only be cleaned
+manually by deleting a hidden directory in the filesystem or by changing
+settings in the Silverlight application. Silverlight's Isolated Storage is one
+of the methods evercookie (section~\ref{subsec:evercookie}) uses to make
+permanent deletion of cookies hard to do and to facilitate cookie respawning.
+Usage of Silverlight has seen a steady decline since 2011 even though it has
+been used by popular video streaming websites such as Netflix
+\cite{NetflixBeginsRollOut2010} and Amazon. Microsoft did not include
+Silverlight support in Windows 8 and declared end-of-life in a blog post for
+October of 2021 \cite{SilverlightEndSupport2015}. Usage of Silverlight currently
+hovers around 0.04\% for the top 10 million websites
+\cite{w3techsUsageStatisticsSilverlight2020}.
+
+\subsection{HTML5 Web Storage}
+\label{subsec:html5 web storage}
+
+HTML5 Web Storage comes in three different forms: HTML5 Global Storage, HTML5
+Local Storage and HTML5 Session Storage. It is part of the HTML specification
+\cite{whatwgHTMLStandard2020} and provides means for storing name-value pairs on
+the user's computer. HTML5 Web Storage works similarly to cookies but enables
+developers to manage transactions that are done by the user simultaneously but
+in two different windows. Whereas with cookies the transaction can accidentally
+be recorded twice, HTML5 Web Storage allows multiple windows to access the same
+storage on the user's computer thereby avoiding this problem. In contrast to
+cookies, which are sent to the server every time a request is made, HTML5 Storage
+contents do not get sent to the web server. By default the storage limit is
+configured to be 5 megabytes per origin \cite{whatwgHTMLStandard2020a}. Even
+though this was only a recommendation by the standard, all modern browsers
+adhere to it. More space can be allocated upon asking the user for permission to
+do so.
+
+Global Storage was part of an initial HTML5 draft and is accessible across
+applications. Due to it violating the same-origin policy, most major browsers
+have not implemented Global Storage.
+
+Local Storage does, however, obey the same-origin policy by only allowing the
+originating domain access to its name-value pairs. Every website has their own
+separate storage area which maintains a clear separation of concerns.
+
+\subsection{HTML5 Indexed Database API}
+\label{subsec:html5 indexed database api}
+
+\subsection{Web SQL Database}
+\label{subsec:web sql database}
+
+
+\section{Cache-based Tracking Methods}
+\label{sec:cache-based tracking methods}
+
+\subsection{DNS Cache}
+\label{subsec:dns cache}
+
+\subsection{Browser Cache}
+\label{subsec:browser cache}
+
+\end{document}
--- a/code/hidden-web-form.html
+++ b/code/hidden-web-form.html
@ -0,0 +1,16 @@
+<form>
+  <div>
+    <label for="title">Post title:</label>
+    <input type="text" id="title" name="title" value="blog post">
+  </div>
+  <div>
+    <label for="content">Post content:</label>
+    <textarea id="content" name="content" cols="60" rows="5">
+      Welcome to my blog post!
+    </textarea>
+  </div>
+  <div>
+    <button type="submit">Update post</button>
+  </div>
+  <input type="hidden" id="userId" name="userId" value="5239asbd923fade923da">
+</form>
--- a/code/permanent-cookie-header
+++ b/code/permanent-cookie-header
@ -0,0 +1,5 @@
+HTTP/2.0 200 OK
+Content-type: text/html
+Set-Cookie: cookie1=value1; Expires=Thu, 27 Feb 2020 08:56:00 GMT
+
+[page content]
--- a/code/session-cookie-header
+++ b/code/session-cookie-header
@ -0,0 +1,5 @@
+HTTP/2.0 200 OK
+Content-type: text/html
+Set-Cookie: cookie1=value1
+
+[page content]
--- a/figures/cookiesyncing.pdf
+++ b/figures/cookiesyncing.pdf
--- a/main.tex
+++ b/main.tex
@ -0,0 +1,126 @@
+\documentclass[a4paper,12pt,oneside]{scrreport}
+
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage[scaled]{helvet}
+\usepackage{times}
+\usepackage{subfiles}
+\usepackage[english]{babel}
+\usepackage[includeheadfoot,left=3.4cm,right=2.4cm,bottom=1.5cm,top=1.7cm]{geometry}
+\usepackage{graphicx}
+\usepackage{microtype}
+\usepackage{setspace}
+\usepackage{fancyhdr}
+\usepackage[hidelinks]{hyperref}
+\usepackage{xcolor}
+\usepackage{minted}
+\usepackage{listings}
+\usepackage{csquotes}
+\usepackage{xr}
+\usepackage[acronym]{glossaries}
+\usepackage{lastpage}
+
+\glsenablehyper
+
+\setlength{\marginparwidth}{2cm}
+\setlength{\parindent}{0pt}
+\setlength{\parskip}{0.5em}
+
+\usepackage{todonotes}
+
+\fancypagestyle{frontmatter}{%
+    \fancyhead{}
+    \fancyfoot{}
+    \fancyfoot[C]{\thepage}
+    \renewcommand{\headrulewidth}{0pt}
+    \renewcommand{\footrulewidth}{0pt}
+}
+
+\definecolor{light-gray}{gray}{0.95}
+
+\RedeclareSectionCommand[beforeskip=0.5cm,afterskip=1.5cm]{chapter}
+\addtokomafont{chapter}{\normalfont\sffamily\huge}
+\addtokomafont{section}{\normalfont\sffamily\Large}
+\addtokomafont{subsection}{\normalfont\sffamily\large}
+
+\usepackage[backend=biber,style=ieee,urldate=iso,date=iso,seconds=true]{biblatex}
+
+\addbibresource{bibliography/references.bib}
+
+\hypersetup{
+    linkcolor=black,
+    urlcolor=black,
+    citecolor=black,
+    breaklinks=true,
+    colorlinks=true,
+    frenchlinks=true,
+    linktoc = all,
+    pdftitle = {Stateful Web Tracking: Techniques and Countermeasures},
+    pdfauthor = {Tobias Eidelpes}
+}
+
+\pagestyle{fancy}
+
+\renewcommand{\chaptermark}[1]{\markboth{\chaptername\ \thechapter.\ #1}{}}
+\renewcommand{\sectionmark}[1]{\markright{\arabic{chapter}.\arabic{section}.\ #1}}
+\renewcommand	{\headrulewidth}{0.4pt} % unterdruecken der Linie
+\renewcommand	{\footrulewidth}{0.4pt} % unterdruecken der Linie
+
+\fancyhead{}
+
+\fancyhead[L]{\leftmark}
+\fancyhead[R]{\rightmark}
+
+\fancyfoot{}
+\fancyfoot[L]{Stateful Web Tracking: Techniques and Countermeasures}
+\fancyfoot[R]{\thepage \ / \pageref{LastPage}}
+
+\fancypagestyle{plain}{}
+
+\graphicspath{{figures/}{../figures/}}
+
+\setstretch{1.1}
+
+\makeglossaries
+
+\begin{document}
+
+    \input{abbrev/acronym.tex}
+
+    \pagenumbering{roman}
+
+    \subfile{chapters/abstract-de}
+    \thispagestyle{frontmatter}
+
+    \subfile{chapters/abstract-en}
+    \thispagestyle{frontmatter}
+
+    \tableofcontents
+    \thispagestyle{frontmatter}
+
+    \listoffigures
+    \thispagestyle{frontmatter}
+
+    \listoflistings
+    \thispagestyle{frontmatter}
+
+    \printglossaries
+    \thispagestyle{frontmatter}
+
+    \subfile{chapters/introduction}
+
+    \pagenumbering{arabic}
+
+    \subfile{chapters/methods}
+
+    \subfile{chapters/defences}
+
+    \subfile{chapters/implications}
+
+    \subfile{chapters/developments}
+
+    \subfile{chapters/conclusion}
+
+    \printbibliography
+
+\end{document}