Rework template

2020-02-27 12:39:14 +01:00 · 2020-02-27 12:39:14 +01:00 · 0b2d4d8c03
commit 0b2d4d8c03
17 changed files with 2328 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,20 @@
 _minted-main/
 main.acn
 main.aux
 main.bbl
 main.bcf
 main.blg
 main.fdb_latexmk
 main.fls
 main.glo
 main.glsdefs
 main.ist
 main.lof
 main.log
 main.lol
 main.out
 main.pdf
 main.run.xml
 main.synctex.gz
 main.toc
--- a/abbrev/acronym.tex
+++ b/abbrev/acronym.tex
@ -0,0 +1,23 @@
 \newacronym	{INSO}	    {INSO}	    {Industrial Software}
 \newacronym	{TU}	    {TU}	    {Technische Universit\"at Wien}
 \newacronym	{ZID}	    {ZID}	    {Zentraler Informatikdienst}
 \newacronym	{KISS}	    {KISS}	    {Keep It Sober and Significant}
 \newacronym {URL}       {URL}       {Uniform Resource Locator}
 \newacronym {URI}       {URI}       {Uniform Resource Identifier}
 \newacronym {WWW}       {WWW}       {World Wide Web}
 \newacronym {HTTP}      {HTTP}      {Hypertext Transfer Protocol}
 \newacronym {HTML}      {HTML}      {Hypertext Markup Language}
 \newacronym {DOM}       {DOM}       {Document Object Model}
 \newacronym {API}       {API}       {Application Programming Interface}
 \newacronym {XHTML}     {XHTML}     {Extensible Hypertext Markup Language}
 \newacronym {XML}       {XML}       {Extensible Markup Language}
 \newacronym {W3C}       {W3C}       {World Wide Web Consortium}
 \newacronym {WHATWG}    {WHATWG}    {Web Hypertext Application Technology Working Group}
 \newacronym {JSON}      {JSON}      {JavaScript Object Notation}
 \newacronym {PII}       {PII}       {Personally Identifiable Information}
 \newacronym {LSO}       {LSO}       {Local Shared Object}
 \newacronym {CSS}       {CSS}       {Cascading Style Sheets}
 \newacronym {RTB}       {RTB}       {Real Time Bidding}
 \newacronym {TLS}       {TLS}       {Transport Layer Security}
 \newacronym {VPN}       {VPN}       {Virtual Private Network}
 \newacronym {ISP}       {ISP}       {Internet Service Provider}
--- a/abbrev/acronym.tex.aux
+++ b/abbrev/acronym.tex.aux
@ -0,0 +1,158 @@
 \relax 
 \providecommand\hyper@newdestlabel[2]{}
 \@setckpt{abbrev/acronym.tex}{
 \setcounter{page}{1}
 \setcounter{equation}{0}
 \setcounter{enumi}{0}
 \setcounter{enumii}{0}
 \setcounter{enumiii}{0}
 \setcounter{enumiv}{0}
 \setcounter{footnote}{0}
 \setcounter{mpfootnote}{0}
 \setcounter{part}{0}
 \setcounter{chapter}{0}
 \setcounter{section}{0}
 \setcounter{subsection}{0}
 \setcounter{subsubsection}{0}
 \setcounter{paragraph}{0}
 \setcounter{subparagraph}{0}
 \setcounter{figure}{0}
 \setcounter{table}{0}
 \setcounter{parentequation}{0}
 \setcounter{su@anzahl}{0}
 \setcounter{LT@tables}{0}
 \setcounter{LT@chunks}{0}
 \setcounter{Item}{0}
 \setcounter{Hfootnote}{0}
 \setcounter{bookmark@seq@number}{0}
 \setcounter{FancyVerbLine}{0}
 \setcounter{linenumber}{1}
 \setcounter{LN@truepage}{0}
 \setcounter{FV@TrueTabGroupLevel}{0}
 \setcounter{FV@TrueTabCounter}{0}
 \setcounter{FV@HighlightLinesStart}{0}
 \setcounter{FV@HighlightLinesStop}{0}
 \setcounter{FancyVerbLineBreakLast}{0}
 \setcounter{float@type}{16}
 \setcounter{minted@FancyVerbLineTemp}{0}
 \setcounter{minted@pygmentizecounter}{0}
 \setcounter{listing}{0}
 \setcounter{lstnumber}{1}
 \setcounter{tabx@nest}{0}
 \setcounter{listtotal}{0}
 \setcounter{listcount}{0}
 \setcounter{liststart}{0}
 \setcounter{liststop}{0}
 \setcounter{citecount}{0}
 \setcounter{citetotal}{0}
 \setcounter{multicitecount}{0}
 \setcounter{multicitetotal}{0}
 \setcounter{instcount}{0}
 \setcounter{maxnames}{3}
 \setcounter{minnames}{3}
 \setcounter{maxitems}{3}
 \setcounter{minitems}{1}
 \setcounter{citecounter}{0}
 \setcounter{maxcitecounter}{0}
 \setcounter{savedcitecounter}{0}
 \setcounter{uniquelist}{0}
 \setcounter{uniquename}{0}
 \setcounter{refsection}{0}
 \setcounter{refsegment}{0}
 \setcounter{maxextratitle}{0}
 \setcounter{maxextratitleyear}{0}
 \setcounter{maxextraname}{2}
 \setcounter{maxextradate}{0}
 \setcounter{maxextraalpha}{0}
 \setcounter{abbrvpenalty}{50}
 \setcounter{highnamepenalty}{50}
 \setcounter{lownamepenalty}{25}
 \setcounter{maxparens}{3}
 \setcounter{parenlevel}{0}
 \setcounter{mincomprange}{10}
 \setcounter{maxcomprange}{100000}
 \setcounter{mincompwidth}{1}
 \setcounter{afterword}{0}
 \setcounter{savedafterword}{0}
 \setcounter{annotator}{0}
 \setcounter{savedannotator}{0}
 \setcounter{author}{0}
 \setcounter{savedauthor}{0}
 \setcounter{bookauthor}{0}
 \setcounter{savedbookauthor}{0}
 \setcounter{commentator}{0}
 \setcounter{savedcommentator}{0}
 \setcounter{editor}{0}
 \setcounter{savededitor}{0}
 \setcounter{editora}{0}
 \setcounter{savededitora}{0}
 \setcounter{editorb}{0}
 \setcounter{savededitorb}{0}
 \setcounter{editorc}{0}
 \setcounter{savededitorc}{0}
 \setcounter{foreword}{0}
 \setcounter{savedforeword}{0}
 \setcounter{holder}{0}
 \setcounter{savedholder}{0}
 \setcounter{introduction}{0}
 \setcounter{savedintroduction}{0}
 \setcounter{namea}{0}
 \setcounter{savednamea}{0}
 \setcounter{nameb}{0}
 \setcounter{savednameb}{0}
 \setcounter{namec}{0}
 \setcounter{savednamec}{0}
 \setcounter{translator}{0}
 \setcounter{savedtranslator}{0}
 \setcounter{shortauthor}{0}
 \setcounter{savedshortauthor}{0}
 \setcounter{shorteditor}{0}
 \setcounter{savedshorteditor}{0}
 \setcounter{labelname}{0}
 \setcounter{savedlabelname}{0}
 \setcounter{institution}{0}
 \setcounter{savedinstitution}{0}
 \setcounter{lista}{0}
 \setcounter{savedlista}{0}
 \setcounter{listb}{0}
 \setcounter{savedlistb}{0}
 \setcounter{listc}{0}
 \setcounter{savedlistc}{0}
 \setcounter{listd}{0}
 \setcounter{savedlistd}{0}
 \setcounter{liste}{0}
 \setcounter{savedliste}{0}
 \setcounter{listf}{0}
 \setcounter{savedlistf}{0}
 \setcounter{location}{0}
 \setcounter{savedlocation}{0}
 \setcounter{organization}{0}
 \setcounter{savedorganization}{0}
 \setcounter{origlocation}{0}
 \setcounter{savedoriglocation}{0}
 \setcounter{origpublisher}{0}
 \setcounter{savedorigpublisher}{0}
 \setcounter{publisher}{0}
 \setcounter{savedpublisher}{0}
 \setcounter{language}{0}
 \setcounter{savedlanguage}{0}
 \setcounter{origlanguage}{0}
 \setcounter{savedoriglanguage}{0}
 \setcounter{pageref}{0}
 \setcounter{savedpageref}{0}
 \setcounter{textcitecount}{0}
 \setcounter{textcitetotal}{0}
 \setcounter{textcitemaxnames}{0}
 \setcounter{biburlbigbreakpenalty}{100}
 \setcounter{biburlbreakpenalty}{200}
 \setcounter{biburlnumpenalty}{0}
 \setcounter{biburlucpenalty}{0}
 \setcounter{biburllcpenalty}{0}
 \setcounter{smartand}{1}
 \setcounter{bbx:relatedcount}{0}
 \setcounter{bbx:relatedtotal}{0}
 \setcounter{cbx@tempcnta}{0}
 \setcounter{cbx@tempcntb}{0}
 \setcounter{section@level}{0}
 \setcounter{lstlisting}{0}
 }
--- a/bibliography/references.bib
+++ b/bibliography/references.bib
--- a/chapters/abstract-de.tex
+++ b/chapters/abstract-de.tex
@ -0,0 +1,35 @@
 \documentclass[../main.tex]{subfiles}
 \begin{document}
 \chapter*{Kurzfassung}
 \emph{Über diese Vorlage:}
 Dieses Template dient als Vorlage für die Erstellung einer wissenschaftlichen
 Arbeit am INSO. Individuelle Erweiterungen, Strukturanpassungen und
 Layout-Veränderungen können und sollen selbstverständlich nach persönlichem
 Ermessen und in Rücksprache mit Ihrem Betreuer vorgenommen werden.
 \emph{Aufbau}:
 In der Kurzfassung werden auf einer 3/4 bis maximal einer Seite die Kernaussagen
 der Diplomarbeit zusammengefasst. Dabei sollte zunächst die Motivation/der
 Kontext der vorliegenden Arbeit dargestellt werden, und dann kurz die
 Frage-/Problemstellung erläutert werden, max. 1 Absatz! Im nächsten Absatz auf
 die Methode/Verfahrensweise/das konkrete Fallbeispiel eingehen, mit deren Hilfe
 die Ergebnisse erzielt wurden. Im Zentrum der Kurzfassung stehen die zentralen
 eigenen Ergebnisse der Arbeit, die den Wert der vorliegenden wissenschaftlichen
 Arbeit ausmachen. Hier auch, wenn vorhanden, eigene Publikationen erwähnen.
 \emph{Wichtig: Verständlichkeit!}
 Die Kurzfassung soll für Leser verständlich sein, denen das Gebiet der
 Arbeit fremd ist. Deshalb Abkürzungen immer zuerst ausschreiben, in Klammer
 dazu die Erklärung: z.B: \enquote{Im Rahmen der vorliegenden Arbeit werden
 Non Governmental-Organisationen (NGOs) behandelt, \ldots}. In \LaTeX wird
 diese bereits automatisch durch verwenden des Befehls \verb|\ac| erreicht.
 Für Details siehe Paket \texttt{glossaries}.
 \bigskip
 \section*{Schlüsselwörter}
 \end{document}
--- a/chapters/abstract-en.tex
+++ b/chapters/abstract-en.tex
@ -0,0 +1,29 @@
 \documentclass[../main.tex]{subfiles}
 \begin{document}
 \chapter*{Abstract}
 \emph{About this template}:
 This template helps writing a scientific document at INSO. Users of this
 template are welcome to make individual modifications, extensions, and changes
 to layout and typography in accordance with their advisor.
 \emph{Writing an abstract}: The abstract summarizes the most important
 information within less than one page. Within the first paragraph, present the
 motivation and context for your work, followed by the specific aims. In the next
 paragraph, describe your methodology / approach, and / or the specific case you
 are working on. The third paragraph describes the results and the contribution
 of your work.
 \emph{Comprehensibility}: People with different backgrounds who are novel to
 your area of work should be able to understand the abstract. Therefore, acronyms
 should only be used after their full definition has given. E.g., ``This work
 relates to non-governmental organizations (NGOs), \ldots''.
 \bigskip
 \section*{Keywords}
 %Keyword, important, SubjectOfMyPaper, FieldOfWork.
 \end{document}
--- a/chapters/conclusion.tex
+++ b/chapters/conclusion.tex
@ -0,0 +1,7 @@
 \documentclass[../main.tex]{subfiles}
 \begin{document}
 \chapter{Conclusion}
 \end{document}
--- a/chapters/defences.tex
+++ b/chapters/defences.tex
@ -0,0 +1,8 @@
 \documentclass[../main.tex]{subfiles}
 \begin{document}
 \chapter{Defences against Tracking}
 \label{chap:defences against tracking}
 \end{document}
--- a/chapters/developments.tex
+++ b/chapters/developments.tex
@ -0,0 +1,7 @@
 \documentclass[../main.tex]{subfiles}
 \begin{document}
 \chapter{Future Tracking Ecosystem Developments}
 \end{document}
--- a/chapters/implications.tex
+++ b/chapters/implications.tex
@ -0,0 +1,7 @@
 \documentclass[../main.tex]{subfiles}
 \begin{document}
 \chapter{Implications of Tracking}
 \end{document}
--- a/chapters/introduction.tex
+++ b/chapters/introduction.tex
@ -0,0 +1,16 @@
 \documentclass[../main.tex]{subfiles}
 \begin{document}
 \chapter{Introduction}
 \section{Terms and Scope}
 \label{sec:Terms and Scope}
 \section{Background and Related Work}
 \label{sec:Background and Related Work}
 \section{Structure of the Thesis}
 \label{sec:Structure of the Thesis}
 \end{document}
--- a/chapters/methods.tex
+++ b/chapters/methods.tex
@ -0,0 +1,551 @@
 \documentclass[../main.tex]{subfiles}
 \externaldocument{defences}
 \begin{document}
 \chapter{Tracking Methods}
 \label{chap:tracking methods}
 This chapter will go into detail about various tracking methods that have been
 used during the history of the web. It is important to note that some of those
 approaches to tracking date back to when the World Wide Web was still in its
 early development stages. Knowing where the techniques come from helps in
 correctly judging the impact they had and still have on the Internet as we use
 it today. Furthermore, knowledge about the past allows for better predictions of
 future changes in the tracking ecosystem.
 To aid in understanding how they work and where they fit in the tracking
 landscape, three different categories are identified and presented:
 session-based, storage-based and cache-based tracking methods. Each category
 uses different mechanisms and technologies to enable tracking of users. What
 most of them have in common, is that they try to place unique identifiers in
 different places, which can then be read on subsequent visits. Thus, a
 chronological ordering of events enables interested parties to infer not only
 usage statistics but also specific data about the entities behind those
 identifiers.
 \section{Session-based Tracking Methods}
 \label{sec:session-based tracking methods}
 One of the simplest and most used forms of tracking on the Internet rely on
 sessions. Since HTTP is a stateless protocol, web servers cannot by default keep
 track of any previous client requests. In order to implement specific features
 such as personalized advertising, some means to save current and recall previous
 states must be used. For this functionality, sessions were introduced. Sessions
 represent a temporary and interactive exchange of information between two
 parties. Due to their temporary nature, they have to be `brought up' at some
 point and `torn down' at a later point in time. It is not specified however,
 how long the period between establishing and stopping a session has to be. It
 could be only for a single browser session and terminated by the user manually,
 or it could be for as long as a year.
 \subsection{Passing Information in URLs}
 \label{subsec:passing information in urls}
 \glspl{URL} have first been proposed by Berners-Lee in 1994
 \cite{berners-leeUniformResourceLocators1994} and are based on \glspl{URI}
 \cite{berners-leeUniversalResourceIdentifiers1994}. The latter specifies a way
 to uniquely identify a particular resource. The former extends the \gls{URI}
 specification to include where and how a particular resource can be found.
 \glspl{URI} consist of multiple parts:
 \begin{enumerate}
  \item a scheme (in some cases a specific protocol),
  \item an optional authority (network host or domain name),
  \item a path (a specific location on that host),
  \item an optional query and
  \item an optional fragment preceded by a hashtag (a sub resource pointing to
    a specific location within the resource)
 \end{enumerate}
 To access a section called \texttt{introduction} in a blog post named
 \texttt{blog post} on a host with the domain name \texttt{example.com} over the
 \gls{HTTP}, a user might use the following \gls{URI}:
 \begin{verbatim}
 http://example.com/blogpost/#introduction
 \end{verbatim}
 Even though \glspl{URI} and \glspl{URL} are two different things, they are
 mostly used interchangeably today. Especially non-technical people refer to an
 address on the \gls{WWW} simply as a \gls{URL}.
 The optional query parameter is in most cases constructed of multiple
 \texttt{(key,value)} pairs, separated by delimiters such as \texttt{\&} and
 \texttt{;}. In the tracking context, query parameters can be used to pass
 information (e.g. unique identifiers) to the resource that is to be accessed by
 appending a unique string to all the links within the downloaded page. Since
 requests to pages are generally logged by the server, requesting multiple pages
 with the same unique identifier leaves a trail behind that can be used to
 compile a browsing history. Sharing information with other parties is not only
 limited to unique identifiers. \gls{URL} parameters can also be used to pass the
 referrer of a web page containing a query that has been submitted by the user.
 \citeauthor{falahrastegarTrackingPersonalIdentifiers2016} demonstrate such an
 example where an advertisement tracker logs a user's browsing history by storing
 the referrer into a \texttt{(key,value)} pair
 \cite[p.~37]{falahrastegarTrackingPersonalIdentifiers2016}. Other possibilities
 include encoding geographical data, network properties, user information (e.g.,
 e-mails) and authentication credentials.
 \citeauthor{westMeasuringPrivacyDisclosures2014} conducted a survey concerning
 the use of \gls{URL} Query Strings and found it to be in widespread use on the
 web \cite{westMeasuringPrivacyDisclosures2014}.
 \subsection{Hidden Form Fields}
 \label{subsec:hidden form fields}
 The \gls{HTML} provides a specification for form elements, which allow users to
 submit information (e.g., for authentication) to the server via POST or GET
 methods. Normally, a user would input data into a form and on clicking
 \emph{submit} the input would be sent to the server. Sometimes it is necessary
 to include additional information that the user did not enter. For this reason
 there exist \emph{hidden} web forms. Hidden web forms do not show on the website
 and therefore the user cannot enter any information. Similar to \gls{URL}
 parameters, the value parameter in a hidden field contains additional
 information like the user's preferred language for example. Since almost
 anything can be sent in a value parameter, hidden form fields present another
 way to maintain a session. A parameter containing a unique identifier will be
 sent with the data the user has submitted to the server. The server can then
 match the action the user took with the identifier. In case the server already
 knows that specific identifier from a previous interaction with the user, the
 gained information can now be added to the user's browsing profile. An example
 of a hidden web form is given in Listing~\ref{lst:hidden web form}, which has
 been adapted from \cite{InputFormInput}. In Line 15 a hidden web field is
 created and the \texttt{value} field is set by the server to contain a unique
 user identifier. Once the \emph{submit} button has been clicked, the identifier
 is sent to the server along with the data the user has filled in.
 \begin{listing}
    \inputminted[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{html}{code/hidden-web-form.html}
    \caption{Example of an \gls{HTTP} form containing a hidden field with
    \texttt{id=userId}. The id is set by the web server dynamically so that every
 visitor has his/her unique identifier attached to the form.}
    \label{lst:hidden web form}
 \end{listing}
 \subsection{HTTP Referer}
 \label{subsec:http referer}
 Providers of web services often want to know where visitors to their website
 come from to understand more about their users and their browsing habits. The
 \gls{HTTP} specification accounts for this by introducing the \emph{\gls{HTTP}
 Referer field} [\emph{sic}] in the header. By checking the referrer, the server
 can see where the request came from. In practice, a user clicks on a link on a
 web page and the current web page is sent as a \gls{URL} in the \gls{HTTP}
 Referer field. The header with the referrer information gets attached to the
 \gls{HTTP} request which is sent to the server. The server responds with the
 requested web page and can establish a link from the original web page to the
 new web page.  When applied to a majority of the requests on a site, the
 resulting data can be analyzed for promotional and statistical purposes.
 \citeauthor{malandrinoPrivacyAwarenessInformation2013} have shown that the
 \gls{HTTP} Referer is one of the most critical factors in leaking \gls{PII}
 \cite{malandrinoPrivacyAwarenessInformation2013}, because leakage of information
 relating to user's health has been identified as the most severe in terms of
 identifiability of users on the web.
 \subsection{Explicit Authentication}
 \label{subsec:explicit authentication}
 Explicit authentication requires a user to \emph{explicitly} log in or register
 to the website. This way, specific resources are only available to the user when
 he or she has authenticated themselves to the service. Actions taken on an
 authenticated user account are tied to that account and crafting a personal
 profile is more or less a built-in function in this case. Since merely asking a
 user to authenticate is a simple method, the extent to which it can be used is
 limited. Logged in users are generally not logged in across different browser
 sessions, unless they are using cookies to do so (see section~\ref{subsec:http
 cookies}), therefore limiting tracking to one session at a time. Furthermore,
 always requiring a logged in state can be a tiring task for users, because they
 have to be authenticated every time they visit a particular service. This can
 potentially pose a usability problem where users simply stop using the service
 or go to considerable lengths to avoid logging in. This largely depends on a
 cost-benefit analysis the users subconsciously undertake \cite{}. The third
 factor where this method is lacking, concerns the awareness of the user being
 tracked. Since tracking users depends on them actively logging in to the
 service, tracking them transparently is impossible. Even though most tracking
 efforts are not detected by the average user \cite{}, it is known that actions
 taken on an account are logged to provide better service through service
 optimization and profile personalization.
 Making an account on a website to use their services to their full extent, can
 be beneficial in some cases. Facebook for example, allows their users to
 configure what they want to share with the public and their friends. Research
 has shown however, that managing which posts get shown to whom is not as
 straightforward as one might think.
 \todo{Wrong chapter?} \citeauthor{liuAnalyzingFacebookPrivacy2011}
 \cite{liuAnalyzingFacebookPrivacy2011} conducted a survey where they asked
 Facebook users about their desired privacy and visibility settings and
 cross-checked them with the actual settings they have used for their posts. The
 results showed that in only 37\% of cases the users' expectations match the
 reality. Additionally, 36\% of content is left on the default privacy settings
 which set the visibility of posts to public, meaning that any Facebook user can
 view them.
 \subsection{window.name DOM Property}
 \label{subsec:window.name dom property}
 The \gls{DOM} is a platform and language agnostic \gls{API} which defines the
 logical structure of web documents (i.e., \gls{HTML}, \gls{XHTML} and \gls{XML})
 and the way they are accessed and manipulated. The \gls{DOM} was originally
 introduced by Netscape at the same time as JavaScript as the \gls{DOM} Level 0.
 The first recommendation (\gls{DOM} Level 1) was released in 1998 by the
 \gls{W3C} \gls{DOM} working group \cite{w3cDocumentObjectModel1998} which
 published its final recommendation (\gls{DOM} Level 3) in 2004. Since then the
 \gls{WHATWG} took over and in 2015 published the \gls{DOM} Level 4 standard
 \cite{whatwgDOMLivingStandard2020} which replaces the Level 3 specification. It
 works by organizing all objects in a document in a tree structure which allows
 individual parts to be altered when a specific event happens (e.g., user
 interaction). Furthermore, each object has properties which are either applied to
 all \gls{HTML} elements or only to a subset of all elements.
 One useful property for tracking purposes is the \texttt{window.name} property.
 Its original intention was to allow client-side JavaScript to get or set the
 name of the current window. Since windows do not have to have names, the
 window.name property is being used mostly for setting targets for hyperlinks and
 forms. Modern browsers allow storing up to two megabytes of data in the
 window.name property, which makes it a viable option for using it as a data
 storage or---more specifically---maintaining session variables. In order to
 store multiple variables in the window.name property, the values have first to
 be packed in some way because only a single string is allowed. A \gls{JSON}
 stringifier converts a normal string into a \gls{JSON} string which is then
 ready to be stored in the DOM property. Additionally, serializers can also
 convert JavaScript objects into a \gls{JSON} string. Normally JavaScript's
 same-origin policy prohibits making requests to servers in another domain, but
 the window.name property is accessible from other domains and resistant to page
 reloads. Maintaining a session across domains and without cookies is therefore
 possible and multiple implementations exist
 \cite{frankSessionVariablesCookies2008,zypWindowNameTransport2008}.
 \section{Storage-based Tracking Methods}
 \label{sec:storage-based tracking methods}
 Storage-based tracking methods are different to session-based tracking methods
 in that they try to store information on the client's computer not only for
 single sessions but for as long as desired. The following methods can be used to
 store session data as well but are not limited to that use case. They generally
 enable more advanced tracking approaches because they have information about the
 current browser instance and the operating system the browser is running on. Due
 to their nature of residing on the user's computer, they are in most cases
 harder to circumvent, especially when two or more methods are combined resulting
 in better resilience against simple defences.
 \subsection{HTTP Cookies}
 \label{subsec:http cookies}
 A method which is most often associated with tracking on the Internet is
 tracking with \gls{HTTP} cookies. Cookies are small files that are placed in the
 browser's storage on the user's computer. They are limited to four kilobytes in
 size and are generally used to identify and authenticate users and to store
 website preferences. They were introduced to the web to allow stateful
 information to be stored because the \gls{HTTP} is a stateless protocol and
 therefore does not have this capability. It is also a way of reducing the
 server's load by not having to recompute states every time a user visits a
 website. Shopping cart functionality for example can thus be implemented by
 setting a cookie in the user's browser, saving the items which are currently
 added to the shopping cart and giving the user the possibility to resume
 shopping at a later point provided that they do not delete their cookies. With
 the introduction of cookies, advertising companies could reidentify users by
 placing unique identifiers in the browser and reading them on subsequent visits.
 The first standard for cookies was published in 1997
 \cite{kristolHTTPStateManagement1997} and has since been updated multiple times
 \cite{kristolHTTPStateManagement2000,barthHTTPStateManagement2011}.
 Cookies can be divided into two categories: first party cookies, which are
 created by the domain the user has requested and third party cookies, which are
 placed in the user's browser by other domains that are generally not under the
 control of the first party. Whereas first party cookies are commonly not used
 for tracking but for the aforementioned shopping cart functionality for example
 or enabling e-commerce applications to function properly, third party cookies are
 popular with data brokerage firms (e.g., Datalogix, Experian, Equifax), online
 advertisers (e.g., DoubleClick) and---belonging to both of these categories in
 some cases---social media platforms (e.g., Facebook). The distinction between
 these two categories is not always clear, however. Google Analytics for example
 is considered to be a third party but offers their analytics services by setting
 a first party cookie in the user's browser in addition to loading JavaScript
 snippets from their servers. Therefore, categorizing cookies into those that
 serve third party web content and those that serve first party web content
 presents a more adequate approach.
 Cookies are set either by calling scripts that are embedded in a web page (e.g.,
 Google's \texttt{analytics.js}) or by using the \gls{HTTP} Set-Cookie response
 header. Once a request to a web server has been issued, the server can set a
 cookie in the Set-Cookie header and sends the response back to the client. On
 the client's side the cookie is stored by the browser and sent with subsequent
 requests to the same domain via the Cookie \gls{HTTP} header. An example of a
 cookie header is given in Listing~\ref{lst:session cookie header}. Because this
 example does not set an expiration date for the cookie, it sets a session
 cookie. Session cookies are limited to the current session and are deleted as
 soon as the session is `torn down'. By adding an expiration date (demonstrated
 in Listing~\ref{lst:permanent cookie header}) or a maximum age, the cookie
 becomes permanent. Additionally, the domain attribute can be specified, meaning
 that cookies which list a different domain than the origin, are rejected by the
 user agent \cite[Section 4.1.2.3]{barthHTTPStateManagement2011}. The same-origin
 policy applies to cookies, disallowing access by other domains.
 \begin{listing}
    \inputminted[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{http}{code/session-cookie-header}
    \caption{An example of an \gls{HTTP} header setting a session cookie.}
    \label{lst:session cookie header}
 \end{listing}
 \begin{listing}
    \inputminted[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{http}{code/permanent-cookie-header}
    \caption{An example of an \gls{HTTP} header setting a permanent cookie.}
    \label{lst:permanent cookie header}
 \end{listing}
 Distinguishing tracking and non-tracking cookies can be done with high accuracy
 by observing their expiration time and the length of the value field.
 \citeauthor{liTrackAdvisorTakingBack2015} \cite{liTrackAdvisorTakingBack2015}
 demonstrate a supervised learning approach to detecting tracking cookies with
 their tool \emph{TrackAdvisor}. They found that tracking cookies generally have
 a longer expiration time than non-tracking cookies and they need to have a
 sufficiently long value field carrying the unique identifier. Using this method,
 they found that only 10\% of tracking cookies have a lifetime of a single day or
 less while 80\% of non-tracking cookies expire before a day is over.
 Additionally, a length of more than 35 characters in the value field applies to
 80\% of tracking cookies and a value field of less than 35 characters applies to
 80\% of non-tracking cookies. \emph{Cookie Chunking}, where a cookie of larger
 length is split into multiple cookies with smaller length, did not appear to
 affect detection by their method negatively. They also present a site
 measurement of the Alexa Top 10,000 websites, finding that 46\% of websites use
 third party tracking. More recent research
 \cite{gonzalezCookieRecipeUntangling2017} has shown that tracking cookies do not
 have to be long lasting to accumulate data about users. Some cookies---like the
 \texttt{\_\_utma} cookie from Google Analytics for example---save a timestamp of
 the current visit with the unique identifier, thereby allowing to use cookies
 which last a short time but can be afterwards used in series to complete the
 whole picture. \citeauthor{gonzalezCookieRecipeUntangling2017}
 \cite{gonzalezCookieRecipeUntangling2017} have also found 20\% of observed
 cookies to be \gls{URL} or base64 encoded, making decoding of cookies a
 necessary step for analysis. Furthermore---and contrary to previous work---,
 cookie values are found in much more varieties than is assumed by approaches
 that only try to detect cookies by their expiration date and/or character
 length. They also presented an entity based matching algorithm to dissect
 cookies which contain more than a unique identifier. This allows for a better
 understanding and interpretation of complex cookies as they are found in
 advertising networks with a lot of reach (e.g., doubleclick.net). This
 information is particularly useful for building applications that effectively
 detect and block cookies (see chapter~\ref{chap:defences against tracking}).
 \subsection{Flash Cookies and Java JNLP PersistenceService}
 \label{subsec:flash cookies and java jnlp persistenceservice}
 Flash Cookies are similar to HTTP cookies in that they too are a store of
 information that helps websites and servers to recognize already seen users.
 They are referred to as \glspl{LSO} by Adobe and are part of the Adobe Flash
 Player runtime. Instead of storing data in the browser's storage, they have
 their own storage in a different location on the user's computer. Another
 difference is that they cannot only store 4 kilobytes of data but 100 kilobytes
 and they also have no expiration dates by default (\gls{HTTP} cookies live until
 the end of the session unless specified otherwise). Since Flash cookies are not
 created by means the browser normally supports (i.e., \gls{HTTP}, \gls{CSS})
 but by Adobe's Flash Player runtime, browsers are not managing Flash cookies.
 This means that, due to Flash cookies not being tied to a specific browser, they
 function across browsers. This capability makes them an interesting target for
 trackers to store their identifying information in, because out of the box
 browsers initially did not support removing Flash cookies and one had to
 manually set preferences in the \emph{Web Storage Settings panel} provided by
 the Flash Player runtime to get rid of them. Trackers were searching for a new
 way to store identifiers because users became increasingly aware of the dangers
 posed by \gls{HTTP} cookies and reacted by taking countermeasures.
 \citeauthor{soltaniFlashCookiesPrivacy2009}
 \cite{soltaniFlashCookiesPrivacy2009} were the first to report on the usage of
 Flash cookies by advertisers and popular websites. While surveying the top 100
 websites at the time, they found that 54\% of them used Flash cookies. Some
 websites were setting Flash cookies as well as \gls{HTTP} cookies with the same
 values, suggesting that Flash cookies serve as backup to \gls{HTTP} cookies.
 Several websites were found using Flash cookies to respawn already deleted
 \gls{HTTP} cookies, even across domains. \citeauthor{acarWebNeverForgets2014}
 \cite{acarWebNeverForgets2014} automated detecting Flash cookies and access to
 them by monitoring file access with the GNU/Linux \emph{strace} tool
 \cite{michaelStraceLinuxManual2020}. This allowed them to acquire data about
 Flash cookies respawning \gls{HTTP} cookies.  Their results show that six of the
 top 100 sites use Flash cookies for respawning.
 Even though Flash usage has declined during the last few years thanks to the
 development of the HTML5 standard, \citeauthor{buhovFLASH20thCentury2018}
 \cite{buhovFLASH20thCentury2018} have shown that despite major security flaws,
 Flash content is still served by 7.5\% of the top one million websites (2017).
 The W3Techs Web Technology Survey shows a similar trend and also offers an
 up-to-date measurement of 2.7\% of the top ten million websites for the year
 2020 \cite{w3techsHistoricalYearlyTrends2020}.  Due to the security concerns in
 using Flash, Google's popular video sharing platform YouTube switched by default
 to the HTML5 <video> tag in January of 2015
 \cite{youtubeengineeringYouTubeNowDefaults2015}. In 2017 Adobe announced that they
 will end-of-life Flash at the end of 2020, stopping updates and distribution
 \cite{adobecorporatecommunicationsFlashFutureInteractive2017}. Consequently,
 Chrome 76 and Firefox 69 disabled Flash by default and will drop support
 entirely in 2020.
 Similarly to Flash, Java also provides a way of storing data locally on the
 user's computer via the PersistenceService \gls{API}
 \cite{PersistenceServiceJNLPAPI2015}. It is used by the evercookie library
 (section~\ref{subsec:evercookie}) to store values for cookie respawning by
 injecting a Java applet into the \gls{DOM} of a page
 \cite{baumanEvercookieApplet2013}.
 \subsection{Evercookie}
 \label{subsec:evercookie}
 Evercookie is JavaScript code that can be embedded in websites which allows to
 permanently store information on the user's computer. When activated,
 information is not only stored in standard \gls{HTTP} cookies but also in
 various other places, providing redundancy where possible. A full list of
 locations used by Evercookie can be found on the project's github page
 \cite{kamkarSamykEvercookie2020}. In case the user wants to get rid of all
 information stored by visiting a website that uses evercookies, every location
 has to be cleared because if one remains, all the other cookies are restored.
 The cookie deletion mechanisms that are provided by browsers by default do not
 clear all locations where evercookies are stored, which makes evercookie almost
 impossible to avoid. Evercookie is open source and quietly implementing or using
 evercookie is therefore not easy to do. Additionally, it is reported on the
 project's github page that it might cause severe performance issues in browsers.
 Evercookie has been proposed and implemented by
 \citeauthor{kamkarEvercookieVirtuallyIrrevocable2010} in
 \cite{kamkarEvercookieVirtuallyIrrevocable2010}. Multiple surveys have tried to
 quantify the use of evercookie in the wild.
 \citeauthor{acarWebNeverForgets2014} provide a heuristic for detecting
 evercookies stored on the user's computer \cite{acarWebNeverForgets2014} and
 analyze evercookie usage in conjunction with cookie respawning.
 \subsection{Cookie Synchronization}
 \label{subsec:cookie synchronization}
 When trackers are using cookies to store unique identifiers to track users,
 every tracker assigns a different identifier to the same user, due to the
 same-origin policy disallowing interaction with other trackers. Because of this,
 sharing data between multiple trackers is difficult, since there are no easy
 ways to accurately match an accumulated profile history of one identifier to
 another. This problem has been solved by modern trackers by using a mechanism
 called Cookie Synchronization or Cookie Matching. This technique allows multiple
 trackers to open an information sharing channel between each other without
 necessarily having to know the website the user visits.
 \begin{figure}[ht]
    \centering
    \includegraphics[width=1\textwidth]{cookiesyncing}
    \label{fig:cookie synchronization}
    \caption{Cookie Synchronization in practice between two trackers
    \emph{cloudflare.com} and \emph{google.com}.}
 \end{figure}
 An example of how Cookie Synchronization works in practice is given in
 Figure~\ref{fig:cookie synchronization}. The two parties that are interested in
 tracking the user are called \emph{cloudflare.com} and \emph{google.com} in this
 example. The user they want to track is called \emph{browser}. \emph{Browser}
 first visits \emph{website1.com} which loads JavaScript from
 \emph{cloudflare.com}. \emph{Cloudflare.com} sets a cookie in the browser with a
 tracking identifier called \emph{userID = 1234}. Next, \emph{browser} visits
 another website called \emph{website2.com} which loads an advertisement banner
 from \emph{google.com}. \emph{Google.com} also sets a cookie with the tracking
 identifier \emph{userID = ABCD}. \emph{Browser} has now two cookies from two
 different providers, each of them knowing the user under a different identifier.
 When \emph{browser} visits a third website called \emph{website3.com} which
 makes a request to \emph{cloudflare.com} and recognizes the user with the
 identifier \emph{userID = 1234}, \emph{cloudflare.com} sends an \gls{HTTP}
 redirect, redirecting \emph{browser} to \emph{google.com}. The redirect also
 contains an \gls{HTTP} Query String (see section~\ref{subsec:passing information
 in urls}) which adds a query like \emph{?userID=1234\&publisher=website3.com}.
 The complete GET request to \emph{google.com} might look like this:
 \begin{minted}[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{http}
 GET /index.html?userID=1234&publisher=website3.com HTTP/1.1
 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0
 Host: google.com
 Cookie: userID=ABCD
 \end{minted}
 \emph{Google.com} therefore not only knows that the user with the identifier
 \emph{userID=ABCD} visited \emph{website3.com} but also that \emph{browser} is
 the same user as \emph{userID=1234}. Since the identifiers can now be traced
 back to the same person, the different cookies have been synchronized, allowing
 the two trackers to exchange information about the user without him or her
 knowing.
 Cookie Synchronization has seen widespread adoption especially in \gls{RTB}
 based auctions \cite{olejnikSellingPrivacyAuction2014}.
 \citeauthor{papadopoulosCookieSynchronizationEverything2019}
 \cite{papadopoulosCookieSynchronizationEverything2019} recorded and analyzed the
 browsing habits of 850 users over a time period of one year and found that 97\%
 of users with regular browsing activity were exposed to Cookie Synchronization
 at least once. Furthermore, they found that ``[...] the average user receives
 around 1 synchronization per 68 requests''
 \cite[p.~7]{papadopoulosCookieSynchronizationEverything2019}. In
 \cite{englehardtOnlineTracking1MillionSite2016} the authors crawl the top
 100,000 sites and find that 45 of the top 50 (90\%) third parties and 460 of the
 top 1000 (46\%) use Cookie Synchronization with at least one other party.
 \emph{Doubleclick.net} being at the top sharing 108 cookies with 118 other third
 parties. \citeauthor{papadopoulosExclusiveHowSynced2018} show in
 \cite{papadopoulosExclusiveHowSynced2018} the threat that Cookie Synchronization
 poses to encrypted \gls{TLS} sessions by performing the cookie-syncing over
 unencrypted \gls{HTTP} even though the original request to the website was
 encrypted. This highlights the serious privacy implications for users of
 \gls{VPN} services trying to safeguard their traffic from a potentially
 malicious \gls{ISP}.
 \subsection{Silverlight Isolated Storage}
 \label{subsec:silverlight isolated storage}
 Silverlight Isolated Storage can also be used for storing data for tracking
 purposes on the user's computer. It has been compared to Adobe's Flash
 technology as it too requires a plugin from Microsoft to function. Available for
 storage are 100 kilobytes which is the same amount Flash cookies can store.
 Silverlight does not work in the private browsing mode and can only be cleaned
 manually by deleting a hidden directory in the filesystem or by changing
 settings in the Silverlight application. Silverlight's Isolated Storage is one
 of the methods evercookie (section~\ref{subsec:evercookie}) uses to make
 permanent deletion of cookies hard to do and to facilitate cookie respawning.
 Usage of Silverlight has seen a steady decline since 2011 even though it has
 been used by popular video streaming websites such as Netflix
 \cite{NetflixBeginsRollOut2010} and Amazon. Microsoft did not include
 Silverlight support in Windows 8 and declared end-of-life in a blog post for
 October of 2021 \cite{SilverlightEndSupport2015}. Usage of Silverlight currently
 hovers around 0.04\% for the top 10 million websites
 \cite{w3techsUsageStatisticsSilverlight2020}.
 \subsection{HTML5 Web Storage}
 \label{subsec:html5 web storage}
 HTML5 Web Storage comes in three different forms: HTML5 Global Storage, HTML5
 Local Storage and HTML5 Session Storage. It is part of the HTML specification
 \cite{whatwgHTMLStandard2020} and provides means for storing name-value pairs on
 the user's computer. HTML5 Web Storage works similarly to cookies but enables
 developers to manage transactions that are done by the user simultaneously but
 in two different windows. Whereas with cookies the transaction can accidentally
 be recorded twice, HTML5 Web Storage allows multiple windows to access the same
 storage on the user's computer thereby avoiding this problem. In contrast to
 cookies, which are sent to the server every time a request is made, HTML5 Storage
 contents do not get sent to the web server. By default the storage limit is
 configured to be 5 megabytes per origin \cite{whatwgHTMLStandard2020a}. Even
 though this was only a recommendation by the standard, all modern browsers
 adhere to it. More space can be allocated upon asking the user for permission to
 do so.
 Global Storage was part of an initial HTML5 draft and is accessible across
 applications. Due to it violating the same-origin policy, most major browsers
 have not implemented Global Storage.
 Local Storage does, however, obey the same-origin policy by only allowing the
 originating domain access to its name-value pairs. Every website has their own
 separate storage area which maintains a clear separation of concerns.
 \subsection{HTML5 Indexed Database API}
 \label{subsec:html5 indexed database api}
 \subsection{Web SQL Database}
 \label{subsec:web sql database}
 \section{Cache-based Tracking Methods}
 \label{sec:cache-based tracking methods}
 \subsection{DNS Cache}
 \label{subsec:dns cache}
 \subsection{Browser Cache}
 \label{subsec:browser cache}
 \end{document}
--- a/code/hidden-web-form.html
+++ b/code/hidden-web-form.html
@ -0,0 +1,16 @@
 <form>
  <div>
    <label for="title">Post title:</label>
    <input type="text" id="title" name="title" value="blog post">
  </div>
  <div>
    <label for="content">Post content:</label>
    <textarea id="content" name="content" cols="60" rows="5">
      Welcome to my blog post!
    </textarea>
  </div>
  <div>
    <button type="submit">Update post</button>
  </div>
  <input type="hidden" id="userId" name="userId" value="5239asbd923fade923da">
 </form>
--- a/code/permanent-cookie-header
+++ b/code/permanent-cookie-header
@ -0,0 +1,5 @@
 HTTP/2.0 200 OK
 Content-type: text/html
 Set-Cookie: cookie1=value1; Expires=Thu, 27 Feb 2020 08:56:00 GMT
 [page content]
--- a/code/session-cookie-header
+++ b/code/session-cookie-header
@ -0,0 +1,5 @@
 HTTP/2.0 200 OK
 Content-type: text/html
 Set-Cookie: cookie1=value1
 [page content]
--- a/figures/cookiesyncing.pdf
+++ b/figures/cookiesyncing.pdf
--- a/main.tex
+++ b/main.tex
@ -0,0 +1,126 @@
 \documentclass[a4paper,12pt,oneside]{scrreport}
 \usepackage[utf8]{inputenc}
 \usepackage[T1]{fontenc}
 \usepackage[scaled]{helvet}
 \usepackage{times}
 \usepackage{subfiles}
 \usepackage[english]{babel}
 \usepackage[includeheadfoot,left=3.4cm,right=2.4cm,bottom=1.5cm,top=1.7cm]{geometry}
 \usepackage{graphicx}
 \usepackage{microtype}
 \usepackage{setspace}
 \usepackage{fancyhdr}
 \usepackage[hidelinks]{hyperref}
 \usepackage{xcolor}
 \usepackage{minted}
 \usepackage{listings}
 \usepackage{csquotes}
 \usepackage{xr}
 \usepackage[acronym]{glossaries}
 \usepackage{lastpage}
 \glsenablehyper
 \setlength{\marginparwidth}{2cm}
 \setlength{\parindent}{0pt}
 \setlength{\parskip}{0.5em}
 \usepackage{todonotes}
 \fancypagestyle{frontmatter}{%
    \fancyhead{}
    \fancyfoot{}
    \fancyfoot[C]{\thepage}
    \renewcommand{\headrulewidth}{0pt}
    \renewcommand{\footrulewidth}{0pt}
 }
 \definecolor{light-gray}{gray}{0.95}
 \RedeclareSectionCommand[beforeskip=0.5cm,afterskip=1.5cm]{chapter}
 \addtokomafont{chapter}{\normalfont\sffamily\huge}
 \addtokomafont{section}{\normalfont\sffamily\Large}
 \addtokomafont{subsection}{\normalfont\sffamily\large}
 \usepackage[backend=biber,style=ieee,urldate=iso,date=iso,seconds=true]{biblatex}
 \addbibresource{bibliography/references.bib}
 \hypersetup{
    linkcolor=black,
    urlcolor=black,
    citecolor=black,
    breaklinks=true,
    colorlinks=true,
    frenchlinks=true,
    linktoc = all,
    pdftitle = {Stateful Web Tracking: Techniques and Countermeasures},
    pdfauthor = {Tobias Eidelpes}
 }
 \pagestyle{fancy}
 \renewcommand{\chaptermark}[1]{\markboth{\chaptername\ \thechapter.\ #1}{}}
 \renewcommand{\sectionmark}[1]{\markright{\arabic{chapter}.\arabic{section}.\ #1}}
 \renewcommand	{\headrulewidth}{0.4pt} % unterdruecken der Linie
 \renewcommand	{\footrulewidth}{0.4pt} % unterdruecken der Linie
 \fancyhead{}
 \fancyhead[L]{\leftmark}
 \fancyhead[R]{\rightmark}
 \fancyfoot{}
 \fancyfoot[L]{Stateful Web Tracking: Techniques and Countermeasures}
 \fancyfoot[R]{\thepage \ / \pageref{LastPage}}
 \fancypagestyle{plain}{}
 \graphicspath{{figures/}{../figures/}}
 \setstretch{1.1}
 \makeglossaries
 \begin{document}
    \input{abbrev/acronym.tex}
    \pagenumbering{roman}
    \subfile{chapters/abstract-de}
    \thispagestyle{frontmatter}
    \subfile{chapters/abstract-en}
    \thispagestyle{frontmatter}
    \tableofcontents
    \thispagestyle{frontmatter}
    \listoffigures
    \thispagestyle{frontmatter}
    \listoflistings
    \thispagestyle{frontmatter}
    \printglossaries
    \thispagestyle{frontmatter}
    \subfile{chapters/introduction}
    \pagenumbering{arabic}
    \subfile{chapters/methods}
    \subfile{chapters/defences}
    \subfile{chapters/implications}
    \subfile{chapters/developments}
    \subfile{chapters/conclusion}
    \printbibliography
 \end{document}