Rework template
This commit is contained in:
commit
0b2d4d8c03
20
.gitignore
vendored
Normal file
20
.gitignore
vendored
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
_minted-main/
|
||||||
|
|
||||||
|
main.acn
|
||||||
|
main.aux
|
||||||
|
main.bbl
|
||||||
|
main.bcf
|
||||||
|
main.blg
|
||||||
|
main.fdb_latexmk
|
||||||
|
main.fls
|
||||||
|
main.glo
|
||||||
|
main.glsdefs
|
||||||
|
main.ist
|
||||||
|
main.lof
|
||||||
|
main.log
|
||||||
|
main.lol
|
||||||
|
main.out
|
||||||
|
main.pdf
|
||||||
|
main.run.xml
|
||||||
|
main.synctex.gz
|
||||||
|
main.toc
|
||||||
23
abbrev/acronym.tex
Normal file
23
abbrev/acronym.tex
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
\newacronym {INSO} {INSO} {Industrial Software}
|
||||||
|
\newacronym {TU} {TU} {Technische Universit\"at Wien}
|
||||||
|
\newacronym {ZID} {ZID} {Zentraler Informatikdienst}
|
||||||
|
\newacronym {KISS} {KISS} {Keep It Sober and Significant}
|
||||||
|
\newacronym {URL} {URL} {Uniform Resource Locator}
|
||||||
|
\newacronym {URI} {URI} {Uniform Resource Identifier}
|
||||||
|
\newacronym {WWW} {WWW} {World Wide Web}
|
||||||
|
\newacronym {HTTP} {HTTP} {Hypertext Transfer Protocol}
|
||||||
|
\newacronym {HTML} {HTML} {Hypertext Markup Language}
|
||||||
|
\newacronym {DOM} {DOM} {Document Object Model}
|
||||||
|
\newacronym {API} {API} {Application Programming Interface}
|
||||||
|
\newacronym {XHTML} {XHTML} {Extensible Hypertext Markup Language}
|
||||||
|
\newacronym {XML} {XML} {Extensible Markup Language}
|
||||||
|
\newacronym {W3C} {W3C} {World Wide Web Consortium}
|
||||||
|
\newacronym {WHATWG} {WHATWG} {Web Hypertext Application Technology Working Group}
|
||||||
|
\newacronym {JSON} {JSON} {JavaScript Object Notation}
|
||||||
|
\newacronym {PII} {PII} {Personally Identifiable Information}
|
||||||
|
\newacronym {LSO} {LSO} {Local Shared Object}
|
||||||
|
\newacronym {CSS} {CSS} {Cascading Style Sheets}
|
||||||
|
\newacronym {RTB} {RTB} {Real Time Bidding}
|
||||||
|
\newacronym {TLS} {TLS} {Transport Layer Security}
|
||||||
|
\newacronym {VPN} {VPN} {Virtual Private Network}
|
||||||
|
\newacronym {ISP} {ISP} {Internet Service Provider}
|
||||||
158
abbrev/acronym.tex.aux
Normal file
158
abbrev/acronym.tex.aux
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
\relax
|
||||||
|
\providecommand\hyper@newdestlabel[2]{}
|
||||||
|
\@setckpt{abbrev/acronym.tex}{
|
||||||
|
\setcounter{page}{1}
|
||||||
|
\setcounter{equation}{0}
|
||||||
|
\setcounter{enumi}{0}
|
||||||
|
\setcounter{enumii}{0}
|
||||||
|
\setcounter{enumiii}{0}
|
||||||
|
\setcounter{enumiv}{0}
|
||||||
|
\setcounter{footnote}{0}
|
||||||
|
\setcounter{mpfootnote}{0}
|
||||||
|
\setcounter{part}{0}
|
||||||
|
\setcounter{chapter}{0}
|
||||||
|
\setcounter{section}{0}
|
||||||
|
\setcounter{subsection}{0}
|
||||||
|
\setcounter{subsubsection}{0}
|
||||||
|
\setcounter{paragraph}{0}
|
||||||
|
\setcounter{subparagraph}{0}
|
||||||
|
\setcounter{figure}{0}
|
||||||
|
\setcounter{table}{0}
|
||||||
|
\setcounter{parentequation}{0}
|
||||||
|
\setcounter{su@anzahl}{0}
|
||||||
|
\setcounter{LT@tables}{0}
|
||||||
|
\setcounter{LT@chunks}{0}
|
||||||
|
\setcounter{Item}{0}
|
||||||
|
\setcounter{Hfootnote}{0}
|
||||||
|
\setcounter{bookmark@seq@number}{0}
|
||||||
|
\setcounter{FancyVerbLine}{0}
|
||||||
|
\setcounter{linenumber}{1}
|
||||||
|
\setcounter{LN@truepage}{0}
|
||||||
|
\setcounter{FV@TrueTabGroupLevel}{0}
|
||||||
|
\setcounter{FV@TrueTabCounter}{0}
|
||||||
|
\setcounter{FV@HighlightLinesStart}{0}
|
||||||
|
\setcounter{FV@HighlightLinesStop}{0}
|
||||||
|
\setcounter{FancyVerbLineBreakLast}{0}
|
||||||
|
\setcounter{float@type}{16}
|
||||||
|
\setcounter{minted@FancyVerbLineTemp}{0}
|
||||||
|
\setcounter{minted@pygmentizecounter}{0}
|
||||||
|
\setcounter{listing}{0}
|
||||||
|
\setcounter{lstnumber}{1}
|
||||||
|
\setcounter{tabx@nest}{0}
|
||||||
|
\setcounter{listtotal}{0}
|
||||||
|
\setcounter{listcount}{0}
|
||||||
|
\setcounter{liststart}{0}
|
||||||
|
\setcounter{liststop}{0}
|
||||||
|
\setcounter{citecount}{0}
|
||||||
|
\setcounter{citetotal}{0}
|
||||||
|
\setcounter{multicitecount}{0}
|
||||||
|
\setcounter{multicitetotal}{0}
|
||||||
|
\setcounter{instcount}{0}
|
||||||
|
\setcounter{maxnames}{3}
|
||||||
|
\setcounter{minnames}{3}
|
||||||
|
\setcounter{maxitems}{3}
|
||||||
|
\setcounter{minitems}{1}
|
||||||
|
\setcounter{citecounter}{0}
|
||||||
|
\setcounter{maxcitecounter}{0}
|
||||||
|
\setcounter{savedcitecounter}{0}
|
||||||
|
\setcounter{uniquelist}{0}
|
||||||
|
\setcounter{uniquename}{0}
|
||||||
|
\setcounter{refsection}{0}
|
||||||
|
\setcounter{refsegment}{0}
|
||||||
|
\setcounter{maxextratitle}{0}
|
||||||
|
\setcounter{maxextratitleyear}{0}
|
||||||
|
\setcounter{maxextraname}{2}
|
||||||
|
\setcounter{maxextradate}{0}
|
||||||
|
\setcounter{maxextraalpha}{0}
|
||||||
|
\setcounter{abbrvpenalty}{50}
|
||||||
|
\setcounter{highnamepenalty}{50}
|
||||||
|
\setcounter{lownamepenalty}{25}
|
||||||
|
\setcounter{maxparens}{3}
|
||||||
|
\setcounter{parenlevel}{0}
|
||||||
|
\setcounter{mincomprange}{10}
|
||||||
|
\setcounter{maxcomprange}{100000}
|
||||||
|
\setcounter{mincompwidth}{1}
|
||||||
|
\setcounter{afterword}{0}
|
||||||
|
\setcounter{savedafterword}{0}
|
||||||
|
\setcounter{annotator}{0}
|
||||||
|
\setcounter{savedannotator}{0}
|
||||||
|
\setcounter{author}{0}
|
||||||
|
\setcounter{savedauthor}{0}
|
||||||
|
\setcounter{bookauthor}{0}
|
||||||
|
\setcounter{savedbookauthor}{0}
|
||||||
|
\setcounter{commentator}{0}
|
||||||
|
\setcounter{savedcommentator}{0}
|
||||||
|
\setcounter{editor}{0}
|
||||||
|
\setcounter{savededitor}{0}
|
||||||
|
\setcounter{editora}{0}
|
||||||
|
\setcounter{savededitora}{0}
|
||||||
|
\setcounter{editorb}{0}
|
||||||
|
\setcounter{savededitorb}{0}
|
||||||
|
\setcounter{editorc}{0}
|
||||||
|
\setcounter{savededitorc}{0}
|
||||||
|
\setcounter{foreword}{0}
|
||||||
|
\setcounter{savedforeword}{0}
|
||||||
|
\setcounter{holder}{0}
|
||||||
|
\setcounter{savedholder}{0}
|
||||||
|
\setcounter{introduction}{0}
|
||||||
|
\setcounter{savedintroduction}{0}
|
||||||
|
\setcounter{namea}{0}
|
||||||
|
\setcounter{savednamea}{0}
|
||||||
|
\setcounter{nameb}{0}
|
||||||
|
\setcounter{savednameb}{0}
|
||||||
|
\setcounter{namec}{0}
|
||||||
|
\setcounter{savednamec}{0}
|
||||||
|
\setcounter{translator}{0}
|
||||||
|
\setcounter{savedtranslator}{0}
|
||||||
|
\setcounter{shortauthor}{0}
|
||||||
|
\setcounter{savedshortauthor}{0}
|
||||||
|
\setcounter{shorteditor}{0}
|
||||||
|
\setcounter{savedshorteditor}{0}
|
||||||
|
\setcounter{labelname}{0}
|
||||||
|
\setcounter{savedlabelname}{0}
|
||||||
|
\setcounter{institution}{0}
|
||||||
|
\setcounter{savedinstitution}{0}
|
||||||
|
\setcounter{lista}{0}
|
||||||
|
\setcounter{savedlista}{0}
|
||||||
|
\setcounter{listb}{0}
|
||||||
|
\setcounter{savedlistb}{0}
|
||||||
|
\setcounter{listc}{0}
|
||||||
|
\setcounter{savedlistc}{0}
|
||||||
|
\setcounter{listd}{0}
|
||||||
|
\setcounter{savedlistd}{0}
|
||||||
|
\setcounter{liste}{0}
|
||||||
|
\setcounter{savedliste}{0}
|
||||||
|
\setcounter{listf}{0}
|
||||||
|
\setcounter{savedlistf}{0}
|
||||||
|
\setcounter{location}{0}
|
||||||
|
\setcounter{savedlocation}{0}
|
||||||
|
\setcounter{organization}{0}
|
||||||
|
\setcounter{savedorganization}{0}
|
||||||
|
\setcounter{origlocation}{0}
|
||||||
|
\setcounter{savedoriglocation}{0}
|
||||||
|
\setcounter{origpublisher}{0}
|
||||||
|
\setcounter{savedorigpublisher}{0}
|
||||||
|
\setcounter{publisher}{0}
|
||||||
|
\setcounter{savedpublisher}{0}
|
||||||
|
\setcounter{language}{0}
|
||||||
|
\setcounter{savedlanguage}{0}
|
||||||
|
\setcounter{origlanguage}{0}
|
||||||
|
\setcounter{savedoriglanguage}{0}
|
||||||
|
\setcounter{pageref}{0}
|
||||||
|
\setcounter{savedpageref}{0}
|
||||||
|
\setcounter{textcitecount}{0}
|
||||||
|
\setcounter{textcitetotal}{0}
|
||||||
|
\setcounter{textcitemaxnames}{0}
|
||||||
|
\setcounter{biburlbigbreakpenalty}{100}
|
||||||
|
\setcounter{biburlbreakpenalty}{200}
|
||||||
|
\setcounter{biburlnumpenalty}{0}
|
||||||
|
\setcounter{biburlucpenalty}{0}
|
||||||
|
\setcounter{biburllcpenalty}{0}
|
||||||
|
\setcounter{smartand}{1}
|
||||||
|
\setcounter{bbx:relatedcount}{0}
|
||||||
|
\setcounter{bbx:relatedtotal}{0}
|
||||||
|
\setcounter{cbx@tempcnta}{0}
|
||||||
|
\setcounter{cbx@tempcntb}{0}
|
||||||
|
\setcounter{section@level}{0}
|
||||||
|
\setcounter{lstlisting}{0}
|
||||||
|
}
|
||||||
1315
bibliography/references.bib
Normal file
1315
bibliography/references.bib
Normal file
File diff suppressed because it is too large
Load Diff
35
chapters/abstract-de.tex
Normal file
35
chapters/abstract-de.tex
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\chapter*{Kurzfassung}
|
||||||
|
|
||||||
|
\emph{Über diese Vorlage:}
|
||||||
|
Dieses Template dient als Vorlage für die Erstellung einer wissenschaftlichen
|
||||||
|
Arbeit am INSO. Individuelle Erweiterungen, Strukturanpassungen und
|
||||||
|
Layout-Veränderungen können und sollen selbstverständlich nach persönlichem
|
||||||
|
Ermessen und in Rücksprache mit Ihrem Betreuer vorgenommen werden.
|
||||||
|
|
||||||
|
\emph{Aufbau}:
|
||||||
|
In der Kurzfassung werden auf einer 3/4 bis maximal einer Seite die Kernaussagen
|
||||||
|
der Diplomarbeit zusammengefasst. Dabei sollte zunächst die Motivation/der
|
||||||
|
Kontext der vorliegenden Arbeit dargestellt werden, und dann kurz die
|
||||||
|
Frage-/Problemstellung erläutert werden, max. 1 Absatz! Im nächsten Absatz auf
|
||||||
|
die Methode/Verfahrensweise/das konkrete Fallbeispiel eingehen, mit deren Hilfe
|
||||||
|
die Ergebnisse erzielt wurden. Im Zentrum der Kurzfassung stehen die zentralen
|
||||||
|
eigenen Ergebnisse der Arbeit, die den Wert der vorliegenden wissenschaftlichen
|
||||||
|
Arbeit ausmachen. Hier auch, wenn vorhanden, eigene Publikationen erwähnen.
|
||||||
|
|
||||||
|
\emph{Wichtig: Verständlichkeit!}
|
||||||
|
Die Kurzfassung soll für Leser verständlich sein, denen das Gebiet der
|
||||||
|
Arbeit fremd ist. Deshalb Abkürzungen immer zuerst ausschreiben, in Klammer
|
||||||
|
dazu die Erklärung: z.B: \enquote{Im Rahmen der vorliegenden Arbeit werden
|
||||||
|
Non Governmental-Organisationen (NGOs) behandelt, \ldots}. In \LaTeX wird
|
||||||
|
diese bereits automatisch durch verwenden des Befehls \verb|\ac| erreicht.
|
||||||
|
Für Details siehe Paket \texttt{glossaries}.
|
||||||
|
|
||||||
|
\bigskip
|
||||||
|
|
||||||
|
\section*{Schlüsselwörter}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
29
chapters/abstract-en.tex
Normal file
29
chapters/abstract-en.tex
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\chapter*{Abstract}
|
||||||
|
|
||||||
|
\emph{About this template}:
|
||||||
|
This template helps writing a scientific document at INSO. Users of this
|
||||||
|
template are welcome to make individual modifications, extensions, and changes
|
||||||
|
to layout and typography in accordance with their advisor.
|
||||||
|
|
||||||
|
\emph{Writing an abstract}: The abstract summarizes the most important
|
||||||
|
information within less than one page. Within the first paragraph, present the
|
||||||
|
motivation and context for your work, followed by the specific aims. In the next
|
||||||
|
paragraph, describe your methodology / approach, and / or the specific case you
|
||||||
|
are working on. The third paragraph describes the results and the contribution
|
||||||
|
of your work.
|
||||||
|
|
||||||
|
\emph{Comprehensibility}: People with different backgrounds who are novel to
|
||||||
|
your area of work should be able to understand the abstract. Therefore, acronyms
|
||||||
|
should only be used after their full definition has given. E.g., ``This work
|
||||||
|
relates to non-governmental organizations (NGOs), \ldots''.
|
||||||
|
|
||||||
|
\bigskip
|
||||||
|
|
||||||
|
\section*{Keywords}
|
||||||
|
%Keyword, important, SubjectOfMyPaper, FieldOfWork.
|
||||||
|
|
||||||
|
\end{document}
|
||||||
7
chapters/conclusion.tex
Normal file
7
chapters/conclusion.tex
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\chapter{Conclusion}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
8
chapters/defences.tex
Normal file
8
chapters/defences.tex
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\chapter{Defences against Tracking}
|
||||||
|
\label{chap:defences against tracking}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
7
chapters/developments.tex
Normal file
7
chapters/developments.tex
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\chapter{Future Tracking Ecosystem Developments}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
7
chapters/implications.tex
Normal file
7
chapters/implications.tex
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\chapter{Implications of Tracking}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
16
chapters/introduction.tex
Normal file
16
chapters/introduction.tex
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\chapter{Introduction}
|
||||||
|
|
||||||
|
\section{Terms and Scope}
|
||||||
|
\label{sec:Terms and Scope}
|
||||||
|
|
||||||
|
\section{Background and Related Work}
|
||||||
|
\label{sec:Background and Related Work}
|
||||||
|
|
||||||
|
\section{Structure of the Thesis}
|
||||||
|
\label{sec:Structure of the Thesis}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
551
chapters/methods.tex
Normal file
551
chapters/methods.tex
Normal file
@ -0,0 +1,551 @@
|
|||||||
|
\documentclass[../main.tex]{subfiles}
|
||||||
|
\externaldocument{defences}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\chapter{Tracking Methods}
|
||||||
|
\label{chap:tracking methods}
|
||||||
|
|
||||||
|
This chapter will go into detail about various tracking methods that have been
|
||||||
|
used during the history of the web. It is important to note that some of those
|
||||||
|
approaches to tracking date back to when the World Wide Web was still in its
|
||||||
|
early development stages. Knowing where the techniques come from helps in
|
||||||
|
correctly judging the impact they had and still have on the Internet as we use
|
||||||
|
it today. Furthermore, knowledge about the past allows for better predictions of
|
||||||
|
future changes in the tracking ecosystem.
|
||||||
|
|
||||||
|
To aid in understanding how they work and where they fit in the tracking
|
||||||
|
landscape, three different categories are identified and presented:
|
||||||
|
session-based, storage-based and cache-based tracking methods. Each category
|
||||||
|
uses different mechanisms and technologies to enable tracking of users. What
|
||||||
|
most of them have in common, is that they try to place unique identifiers in
|
||||||
|
different places, which can then be read on subsequent visits. Thus, a
|
||||||
|
chronological ordering of events enables interested parties to infer not only
|
||||||
|
usage statistics but also specific data about the entities behind those
|
||||||
|
identifiers.
|
||||||
|
|
||||||
|
\section{Session-based Tracking Methods}
|
||||||
|
\label{sec:session-based tracking methods}
|
||||||
|
|
||||||
|
One of the simplest and most used forms of tracking on the Internet rely on
|
||||||
|
sessions. Since HTTP is a stateless protocol, web servers cannot by default keep
|
||||||
|
track of any previous client requests. In order to implement specific features
|
||||||
|
such as personalized advertising, some means to save current and recall previous
|
||||||
|
states must be used. For this functionality, sessions were introduced. Sessions
|
||||||
|
represent a temporary and interactive exchange of information between two
|
||||||
|
parties. Due to their temporary nature, they have to be `brought up' at some
|
||||||
|
point and `torn down' at a later point in time. It is not specified however,
|
||||||
|
how long the period between establishing and stopping a session has to be. It
|
||||||
|
could be only for a single browser session and terminated by the user manually,
|
||||||
|
or it could be for as long as a year.
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Passing Information in URLs}
|
||||||
|
\label{subsec:passing information in urls}
|
||||||
|
|
||||||
|
\glspl{URL} have first been proposed by Berners-Lee in 1994
|
||||||
|
\cite{berners-leeUniformResourceLocators1994} and are based on \glspl{URI}
|
||||||
|
\cite{berners-leeUniversalResourceIdentifiers1994}. The latter specifies a way
|
||||||
|
to uniquely identify a particular resource. The former extends the \gls{URI}
|
||||||
|
specification to include where and how a particular resource can be found.
|
||||||
|
\glspl{URI} consist of multiple parts:
|
||||||
|
|
||||||
|
\begin{enumerate}
|
||||||
|
\item a scheme (in some cases a specific protocol),
|
||||||
|
\item an optional authority (network host or domain name),
|
||||||
|
\item a path (a specific location on that host),
|
||||||
|
\item an optional query and
|
||||||
|
\item an optional fragment preceded by a hashtag (a sub resource pointing to
|
||||||
|
a specific location within the resource)
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
To access a section called \texttt{introduction} in a blog post named
|
||||||
|
\texttt{blog post} on a host with the domain name \texttt{example.com} over the
|
||||||
|
\gls{HTTP}, a user might use the following \gls{URI}:
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
http://example.com/blogpost/#introduction
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
Even though \glspl{URI} and \glspl{URL} are two different things, they are
|
||||||
|
mostly used interchangeably today. Especially non-technical people refer to an
|
||||||
|
address on the \gls{WWW} simply as a \gls{URL}.
|
||||||
|
|
||||||
|
The optional query parameter is in most cases constructed of multiple
|
||||||
|
\texttt{(key,value)} pairs, separated by delimiters such as \texttt{\&} and
|
||||||
|
\texttt{;}. In the tracking context, query parameters can be used to pass
|
||||||
|
information (e.g. unique identifiers) to the resource that is to be accessed by
|
||||||
|
appending a unique string to all the links within the downloaded page. Since
|
||||||
|
requests to pages are generally logged by the server, requesting multiple pages
|
||||||
|
with the same unique identifier leaves a trail behind that can be used to
|
||||||
|
compile a browsing history. Sharing information with other parties is not only
|
||||||
|
limited to unique identifiers. \gls{URL} parameters can also be used to pass the
|
||||||
|
referrer of a web page containing a query that has been submitted by the user.
|
||||||
|
\citeauthor{falahrastegarTrackingPersonalIdentifiers2016} demonstrate such an
|
||||||
|
example where an advertisement tracker logs a user's browsing history by storing
|
||||||
|
the referrer into a \texttt{(key,value)} pair
|
||||||
|
\cite[p.~37]{falahrastegarTrackingPersonalIdentifiers2016}. Other possibilities
|
||||||
|
include encoding geographical data, network properties, user information (e.g.,
|
||||||
|
e-mails) and authentication credentials.
|
||||||
|
\citeauthor{westMeasuringPrivacyDisclosures2014} conducted a survey concerning
|
||||||
|
the use of \gls{URL} Query Strings and found it to be in widespread use on the
|
||||||
|
web \cite{westMeasuringPrivacyDisclosures2014}.
|
||||||
|
|
||||||
|
\subsection{Hidden Form Fields}
|
||||||
|
\label{subsec:hidden form fields}
|
||||||
|
|
||||||
|
The \gls{HTML} provides a specification for form elements, which allow users to
|
||||||
|
submit information (e.g., for authentication) to the server via POST or GET
|
||||||
|
methods. Normally, a user would input data into a form and on clicking
|
||||||
|
\emph{submit} the input would be sent to the server. Sometimes it is necessary
|
||||||
|
to include additional information that the user did not enter. For this reason
|
||||||
|
there exist \emph{hidden} web forms. Hidden web forms do not show on the website
|
||||||
|
and therefore the user cannot enter any information. Similar to \gls{URL}
|
||||||
|
parameters, the value parameter in a hidden field contains additional
|
||||||
|
information like the user's preferred language for example. Since almost
|
||||||
|
anything can be sent in a value parameter, hidden form fields present another
|
||||||
|
way to maintain a session. A parameter containing a unique identifier will be
|
||||||
|
sent with the data the user has submitted to the server. The server can then
|
||||||
|
match the action the user took with the identifier. In case the server already
|
||||||
|
knows that specific identifier from a previous interaction with the user, the
|
||||||
|
gained information can now be added to the user's browsing profile. An example
|
||||||
|
of a hidden web form is given in Listing~\ref{lst:hidden web form}, which has
|
||||||
|
been adapted from \cite{InputFormInput}. In Line 15 a hidden web field is
|
||||||
|
created and the \texttt{value} field is set by the server to contain a unique
|
||||||
|
user identifier. Once the \emph{submit} button has been clicked, the identifier
|
||||||
|
is sent to the server along with the data the user has filled in.
|
||||||
|
|
||||||
|
\begin{listing}
|
||||||
|
\inputminted[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{html}{code/hidden-web-form.html}
|
||||||
|
\caption{Example of an \gls{HTTP} form containing a hidden field with
|
||||||
|
\texttt{id=userId}. The id is set by the web server dynamically so that every
|
||||||
|
visitor has his/her unique identifier attached to the form.}
|
||||||
|
\label{lst:hidden web form}
|
||||||
|
\end{listing}
|
||||||
|
|
||||||
|
\subsection{HTTP Referer}
|
||||||
|
\label{subsec:http referer}
|
||||||
|
|
||||||
|
Providers of web services often want to know where visitors to their website
|
||||||
|
come from to understand more about their users and their browsing habits. The
|
||||||
|
\gls{HTTP} specification accounts for this by introducing the \emph{\gls{HTTP}
|
||||||
|
Referer field} [\emph{sic}] in the header. By checking the referrer, the server
|
||||||
|
can see where the request came from. In practice, a user clicks on a link on a
|
||||||
|
web page and the current web page is sent as a \gls{URL} in the \gls{HTTP}
|
||||||
|
Referer field. The header with the referrer information gets attached to the
|
||||||
|
\gls{HTTP} request which is sent to the server. The server responds with the
|
||||||
|
requested web page and can establish a link from the original web page to the
|
||||||
|
new web page. When applied to a majority of the requests on a site, the
|
||||||
|
resulting data can be analyzed for promotional and statistical purposes.
|
||||||
|
\citeauthor{malandrinoPrivacyAwarenessInformation2013} have shown that the
|
||||||
|
\gls{HTTP} Referer is one of the most critical factors in leaking \gls{PII}
|
||||||
|
\cite{malandrinoPrivacyAwarenessInformation2013}, because leakage of information
|
||||||
|
relating to user's health has been identified as the most severe in terms of
|
||||||
|
identifiability of users on the web.
|
||||||
|
|
||||||
|
\subsection{Explicit Authentication}
|
||||||
|
\label{subsec:explicit authentication}
|
||||||
|
|
||||||
|
Explicit authentication requires a user to \emph{explicitly} log in or register
|
||||||
|
to the website. This way, specific resources are only available to the user when
|
||||||
|
he or she has authenticated themselves to the service. Actions taken on an
|
||||||
|
authenticated user account are tied to that account and crafting a personal
|
||||||
|
profile is more or less a built-in function in this case. Since merely asking a
|
||||||
|
user to authenticate is a simple method, the extent to which it can be used is
|
||||||
|
limited. Logged in users are generally not logged in across different browser
|
||||||
|
sessions, unless they are using cookies to do so (see section~\ref{subsec:http
|
||||||
|
cookies}), therefore limiting tracking to one session at a time. Furthermore,
|
||||||
|
always requiring a logged in state can be a tiring task for users, because they
|
||||||
|
have to be authenticated every time they visit a particular service. This can
|
||||||
|
potentially pose a usability problem where users simply stop using the service
|
||||||
|
or go to considerable lengths to avoid logging in. This largely depends on a
|
||||||
|
cost-benefit analysis the users subconsciously undertake \cite{}. The third
|
||||||
|
factor where this method is lacking, concerns the awareness of the user being
|
||||||
|
tracked. Since tracking users depends on them actively logging in to the
|
||||||
|
service, tracking them transparently is impossible. Even though most tracking
|
||||||
|
efforts are not detected by the average user \cite{}, it is known that actions
|
||||||
|
taken on an account are logged to provide better service through service
|
||||||
|
optimization and profile personalization.
|
||||||
|
|
||||||
|
Making an account on a website to use their services to their full extent, can
|
||||||
|
be beneficial in some cases. Facebook for example, allows their users to
|
||||||
|
configure what they want to share with the public and their friends. Research
|
||||||
|
has shown however, that managing which posts get shown to whom is not as
|
||||||
|
straightforward as one might think.
|
||||||
|
\todo{Wrong chapter?} \citeauthor{liuAnalyzingFacebookPrivacy2011}
|
||||||
|
\cite{liuAnalyzingFacebookPrivacy2011} conducted a survey where they asked
|
||||||
|
Facebook users about their desired privacy and visibility settings and
|
||||||
|
cross-checked them with the actual settings they have used for their posts. The
|
||||||
|
results showed that in only 37\% of cases the users' expectations match the
|
||||||
|
reality. Additionally, 36\% of content is left on the default privacy settings
|
||||||
|
which set the visibility of posts to public, meaning that any Facebook user can
|
||||||
|
view them.
|
||||||
|
|
||||||
|
\subsection{window.name DOM Property}
|
||||||
|
\label{subsec:window.name dom property}
|
||||||
|
|
||||||
|
The \gls{DOM} is a platform and language agnostic \gls{API} which defines the
|
||||||
|
logical structure of web documents (i.e., \gls{HTML}, \gls{XHTML} and \gls{XML})
|
||||||
|
and the way they are accessed and manipulated. The \gls{DOM} was originally
|
||||||
|
introduced by Netscape at the same time as JavaScript as the \gls{DOM} Level 0.
|
||||||
|
The first recommendation (\gls{DOM} Level 1) was released in 1998 by the
|
||||||
|
\gls{W3C} \gls{DOM} working group \cite{w3cDocumentObjectModel1998} which
|
||||||
|
published its final recommendation (\gls{DOM} Level 3) in 2004. Since then the
|
||||||
|
\gls{WHATWG} took over and in 2015 published the \gls{DOM} Level 4 standard
|
||||||
|
\cite{whatwgDOMLivingStandard2020} which replaces the Level 3 specification. It
|
||||||
|
works by organizing all objects in a document in a tree structure which allows
|
||||||
|
individual parts to be altered when a specific event happens (e.g., user
|
||||||
|
interaction). Furthermore, each object has properties which are either applied to
|
||||||
|
all \gls{HTML} elements or only to a subset of all elements.
|
||||||
|
|
||||||
|
One useful property for tracking purposes is the \texttt{window.name} property.
|
||||||
|
Its original intention was to allow client-side JavaScript to get or set the
|
||||||
|
name of the current window. Since windows do not have to have names, the
|
||||||
|
window.name property is being used mostly for setting targets for hyperlinks and
|
||||||
|
forms. Modern browsers allow storing up to two megabytes of data in the
|
||||||
|
window.name property, which makes it a viable option for using it as a data
|
||||||
|
storage or---more specifically---maintaining session variables. In order to
|
||||||
|
store multiple variables in the window.name property, the values have first to
|
||||||
|
be packed in some way because only a single string is allowed. A \gls{JSON}
|
||||||
|
stringifier converts a normal string into a \gls{JSON} string which is then
|
||||||
|
ready to be stored in the DOM property. Additionally, serializers can also
|
||||||
|
convert JavaScript objects into a \gls{JSON} string. Normally JavaScript's
|
||||||
|
same-origin policy prohibits making requests to servers in another domain, but
|
||||||
|
the window.name property is accessible from other domains and resistant to page
|
||||||
|
reloads. Maintaining a session across domains and without cookies is therefore
|
||||||
|
possible and multiple implementations exist
|
||||||
|
\cite{frankSessionVariablesCookies2008,zypWindowNameTransport2008}.
|
||||||
|
|
||||||
|
\section{Storage-based Tracking Methods}
|
||||||
|
\label{sec:storage-based tracking methods}
|
||||||
|
|
||||||
|
Storage-based tracking methods are different to session-based tracking methods
|
||||||
|
in that they try to store information on the client's computer not only for
|
||||||
|
single sessions but for as long as desired. The following methods can be used to
|
||||||
|
store session data as well but are not limited to that use case. They generally
|
||||||
|
enable more advanced tracking approaches because they have information about the
|
||||||
|
current browser instance and the operating system the browser is running on. Due
|
||||||
|
to their nature of residing on the user's computer, they are in most cases
|
||||||
|
harder to circumvent, especially when two or more methods are combined resulting
|
||||||
|
in better resilience against simple defences.
|
||||||
|
|
||||||
|
\subsection{HTTP Cookies}
|
||||||
|
\label{subsec:http cookies}
|
||||||
|
|
||||||
|
A method which is most often associated with tracking on the Internet is
|
||||||
|
tracking with \gls{HTTP} cookies. Cookies are small files that are placed in the
|
||||||
|
browser's storage on the user's computer. They are limited to four kilobytes in
|
||||||
|
size and are generally used to identify and authenticate users and to store
|
||||||
|
website preferences. They were introduced to the web to allow stateful
|
||||||
|
information to be stored because the \gls{HTTP} is a stateless protocol and
|
||||||
|
therefore does not have this capability. It is also a way of reducing the
|
||||||
|
server's load by not having to recompute states every time a user visits a
|
||||||
|
website. Shopping cart functionality for example can thus be implemented by
|
||||||
|
setting a cookie in the user's browser, saving the items which are currently
|
||||||
|
added to the shopping cart and giving the user the possibility to resume
|
||||||
|
shopping at a later point provided that they do not delete their cookies. With
|
||||||
|
the introduction of cookies, advertising companies could reidentify users by
|
||||||
|
placing unique identifiers in the browser and reading them on subsequent visits.
|
||||||
|
The first standard for cookies was published in 1997
|
||||||
|
\cite{kristolHTTPStateManagement1997} and has since been updated multiple times
|
||||||
|
\cite{kristolHTTPStateManagement2000,barthHTTPStateManagement2011}.
|
||||||
|
|
||||||
|
Cookies can be divided into two categories: first party cookies, which are
|
||||||
|
created by the domain the user has requested and third party cookies, which are
|
||||||
|
placed in the user's browser by other domains that are generally not under the
|
||||||
|
control of the first party. Whereas first party cookies are commonly not used
|
||||||
|
for tracking but for the aforementioned shopping cart functionality for example
|
||||||
|
or enabling e-commerce applications to function properly, third party cookies are
|
||||||
|
popular with data brokerage firms (e.g., Datalogix, Experian, Equifax), online
|
||||||
|
advertisers (e.g., DoubleClick) and---belonging to both of these categories in
|
||||||
|
some cases---social media platforms (e.g., Facebook). The distinction between
|
||||||
|
these two categories is not always clear, however. Google Analytics for example
|
||||||
|
is considered to be a third party but offers their analytics services by setting
|
||||||
|
a first party cookie in the user's browser in addition to loading JavaScript
|
||||||
|
snippets from their servers. Therefore, categorizing cookies into those that
|
||||||
|
serve third party web content and those that serve first party web content
|
||||||
|
presents a more adequate approach.
|
||||||
|
|
||||||
|
Cookies are set either by calling scripts that are embedded in a web page (e.g.,
|
||||||
|
Google's \texttt{analytics.js}) or by using the \gls{HTTP} Set-Cookie response
|
||||||
|
header. Once a request to a web server has been issued, the server can set a
|
||||||
|
cookie in the Set-Cookie header and sends the response back to the client. On
|
||||||
|
the client's side the cookie is stored by the browser and sent with subsequent
|
||||||
|
requests to the same domain via the Cookie \gls{HTTP} header. An example of a
|
||||||
|
cookie header is given in Listing~\ref{lst:session cookie header}. Because this
|
||||||
|
example does not set an expiration date for the cookie, it sets a session
|
||||||
|
cookie. Session cookies are limited to the current session and are deleted as
|
||||||
|
soon as the session is `torn down'. By adding an expiration date (demonstrated
|
||||||
|
in Listing~\ref{lst:permanent cookie header}) or a maximum age, the cookie
|
||||||
|
becomes permanent. Additionally, the domain attribute can be specified, meaning
|
||||||
|
that cookies which list a different domain than the origin, are rejected by the
|
||||||
|
user agent \cite[Section 4.1.2.3]{barthHTTPStateManagement2011}. The same-origin
|
||||||
|
policy applies to cookies, disallowing access by other domains.
|
||||||
|
|
||||||
|
\begin{listing}
|
||||||
|
\inputminted[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{http}{code/session-cookie-header}
|
||||||
|
\caption{An example of an \gls{HTTP} header setting a session cookie.}
|
||||||
|
\label{lst:session cookie header}
|
||||||
|
\end{listing}
|
||||||
|
|
||||||
|
\begin{listing}
|
||||||
|
\inputminted[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{http}{code/permanent-cookie-header}
|
||||||
|
\caption{An example of an \gls{HTTP} header setting a permanent cookie.}
|
||||||
|
\label{lst:permanent cookie header}
|
||||||
|
\end{listing}
|
||||||
|
|
||||||
|
Distinguishing tracking and non-tracking cookies can be done with high accuracy
|
||||||
|
by observing their expiration time and the length of the value field.
|
||||||
|
\citeauthor{liTrackAdvisorTakingBack2015} \cite{liTrackAdvisorTakingBack2015}
|
||||||
|
demonstrate a supervised learning approach to detecting tracking cookies with
|
||||||
|
their tool \emph{TrackAdvisor}. They found that tracking cookies generally have
|
||||||
|
a longer expiration time than non-tracking cookies and they need to have a
|
||||||
|
sufficiently long value field carrying the unique identifier. Using this method,
|
||||||
|
they found that only 10\% of tracking cookies have a lifetime of a single day or
|
||||||
|
less while 80\% of non-tracking cookies expire before a day is over.
|
||||||
|
Additionally, a length of more than 35 characters in the value field applies to
|
||||||
|
80\% of tracking cookies and a value field of less than 35 characters applies to
|
||||||
|
80\% of non-tracking cookies. \emph{Cookie Chunking}, where a cookie of larger
|
||||||
|
length is split into multiple cookies with smaller length, did not appear to
|
||||||
|
affect detection by their method negatively. They also present a site
|
||||||
|
measurement of the Alexa Top 10,000 websites, finding that 46\% of websites use
|
||||||
|
third party tracking. More recent research
|
||||||
|
\cite{gonzalezCookieRecipeUntangling2017} has shown that tracking cookies do not
|
||||||
|
have to be long lasting to accumulate data about users. Some cookies---like the
|
||||||
|
\texttt{\_\_utma} cookie from Google Analytics for example---save a timestamp of
|
||||||
|
the current visit with the unique identifier, thereby allowing to use cookies
|
||||||
|
which last a short time but can be afterwards used in series to complete the
|
||||||
|
whole picture. \citeauthor{gonzalezCookieRecipeUntangling2017}
|
||||||
|
\cite{gonzalezCookieRecipeUntangling2017} have also found 20\% of observed
|
||||||
|
cookies to be \gls{URL} or base64 encoded, making decoding of cookies a
|
||||||
|
necessary step for analysis. Furthermore---and contrary to previous work---,
|
||||||
|
cookie values are found in much more varieties than is assumed by approaches
|
||||||
|
that only try to detect cookies by their expiration date and/or character
|
||||||
|
length. They also presented an entity based matching algorithm to dissect
|
||||||
|
cookies which contain more than a unique identifier. This allows for a better
|
||||||
|
understanding and interpretation of complex cookies as they are found in
|
||||||
|
advertising networks with a lot of reach (e.g., doubleclick.net). This
|
||||||
|
information is particularly useful for building applications that effectively
|
||||||
|
detect and block cookies (see chapter~\ref{chap:defences against tracking}).
|
||||||
|
|
||||||
|
\subsection{Flash Cookies and Java JNLP PersistenceService}
|
||||||
|
\label{subsec:flash cookies and java jnlp persistenceservice}
|
||||||
|
|
||||||
|
Flash Cookies are similar to HTTP cookies in that they too are a store of
|
||||||
|
information that helps websites and servers to recognize already seen users.
|
||||||
|
They are referred to as \glspl{LSO} by Adobe and are part of the Adobe Flash
|
||||||
|
Player runtime. Instead of storing data in the browser's storage, they have
|
||||||
|
their own storage in a different location on the user's computer. Another
|
||||||
|
difference is that they cannot only store 4 kilobytes of data but 100 kilobytes
|
||||||
|
and they also have no expiration dates by default (\gls{HTTP} cookies live until
|
||||||
|
the end of the session unless specified otherwise). Since Flash cookies are not
|
||||||
|
created by means the browser normally supports (i.e., \gls{HTTP}, \gls{CSS})
|
||||||
|
but by Adobe's Flash Player runtime, browsers are not managing Flash cookies.
|
||||||
|
This means that, due to Flash cookies not being tied to a specific browser, they
|
||||||
|
function across browsers. This capability makes them an interesting target for
|
||||||
|
trackers to store their identifying information in, because out of the box
|
||||||
|
browsers initially did not support removing Flash cookies and one had to
|
||||||
|
manually set preferences in the \emph{Web Storage Settings panel} provided by
|
||||||
|
the Flash Player runtime to get rid of them. Trackers were searching for a new
|
||||||
|
way to store identifiers because users became increasingly aware of the dangers
|
||||||
|
posed by \gls{HTTP} cookies and reacted by taking countermeasures.
|
||||||
|
|
||||||
|
\citeauthor{soltaniFlashCookiesPrivacy2009}
|
||||||
|
\cite{soltaniFlashCookiesPrivacy2009} were the first to report on the usage of
|
||||||
|
Flash cookies by advertisers and popular websites. While surveying the top 100
|
||||||
|
websites at the time, they found that 54\% of them used Flash cookies. Some
|
||||||
|
websites were setting Flash cookies as well as \gls{HTTP} cookies with the same
|
||||||
|
values, suggesting that Flash cookies serve as backup to \gls{HTTP} cookies.
|
||||||
|
Several websites were found using Flash cookies to respawn already deleted
|
||||||
|
\gls{HTTP} cookies, even across domains. \citeauthor{acarWebNeverForgets2014}
|
||||||
|
\cite{acarWebNeverForgets2014} automated detecting Flash cookies and access to
|
||||||
|
them by monitoring file access with the GNU/Linux \emph{strace} tool
|
||||||
|
\cite{michaelStraceLinuxManual2020}. This allowed them to acquire data about
|
||||||
|
Flash cookies respawning \gls{HTTP} cookies. Their results show that six of the
|
||||||
|
top 100 sites use Flash cookies for respawning.
|
||||||
|
|
||||||
|
Even though Flash usage has declined during the last few years thanks to the
|
||||||
|
development of the HTML5 standard, \citeauthor{buhovFLASH20thCentury2018}
|
||||||
|
\cite{buhovFLASH20thCentury2018} have shown that despite major security flaws,
|
||||||
|
Flash content is still served by 7.5\% of the top one million websites (2017).
|
||||||
|
The W3Techs Web Technology Survey shows a similar trend and also offers an
|
||||||
|
up-to-date measurement of 2.7\% of the top ten million websites for the year
|
||||||
|
2020 \cite{w3techsHistoricalYearlyTrends2020}. Due to the security concerns in
|
||||||
|
using Flash, Google's popular video sharing platform YouTube switched by default
|
||||||
|
to the HTML5 <video> tag in January of 2015
|
||||||
|
\cite{youtubeengineeringYouTubeNowDefaults2015}. In 2017 Adobe announced that they
|
||||||
|
will end-of-life Flash at the end of 2020, stopping updates and distribution
|
||||||
|
\cite{adobecorporatecommunicationsFlashFutureInteractive2017}. Consequently,
|
||||||
|
Chrome 76 and Firefox 69 disabled Flash by default and will drop support
|
||||||
|
entirely in 2020.
|
||||||
|
|
||||||
|
Similarly to Flash, Java also provides a way of storing data locally on the
|
||||||
|
user's computer via the PersistenceService \gls{API}
|
||||||
|
\cite{PersistenceServiceJNLPAPI2015}. It is used by the evercookie library
|
||||||
|
(section~\ref{subsec:evercookie}) to store values for cookie respawning by
|
||||||
|
injecting a Java applet into the \gls{DOM} of a page
|
||||||
|
\cite{baumanEvercookieApplet2013}.
|
||||||
|
|
||||||
|
\subsection{Evercookie}
|
||||||
|
\label{subsec:evercookie}
|
||||||
|
|
||||||
|
Evercookie is JavaScript code that can be embedded in websites which allows to
|
||||||
|
permanently store information on the user's computer. When activated,
|
||||||
|
information is not only stored in standard \gls{HTTP} cookies but also in
|
||||||
|
various other places, providing redundancy where possible. A full list of
|
||||||
|
locations used by Evercookie can be found on the project's github page
|
||||||
|
\cite{kamkarSamykEvercookie2020}. In case the user wants to get rid of all
|
||||||
|
information stored by visiting a website that uses evercookies, every location
|
||||||
|
has to be cleared because if one remains, all the other cookies are restored.
|
||||||
|
The cookie deletion mechanisms that are provided by browsers by default do not
|
||||||
|
clear all locations where evercookies are stored, which makes evercookie almost
|
||||||
|
impossible to avoid. Evercookie is open source and quietly implementing or using
|
||||||
|
evercookie is therefore not easy to do. Additionally, it is reported on the
|
||||||
|
project's github page that it might cause severe performance issues in browsers.
|
||||||
|
|
||||||
|
Evercookie has been proposed and implemented by
|
||||||
|
\citeauthor{kamkarEvercookieVirtuallyIrrevocable2010} in
|
||||||
|
\cite{kamkarEvercookieVirtuallyIrrevocable2010}. Multiple surveys have tried to
|
||||||
|
quantify the use of evercookie in the wild.
|
||||||
|
\citeauthor{acarWebNeverForgets2014} provide a heuristic for detecting
|
||||||
|
evercookies stored on the user's computer \cite{acarWebNeverForgets2014} and
|
||||||
|
analyze evercookie usage in conjunction with cookie respawning.
|
||||||
|
|
||||||
|
\subsection{Cookie Synchronization}
|
||||||
|
\label{subsec:cookie synchronization}
|
||||||
|
|
||||||
|
When trackers are using cookies to store unique identifiers to track users,
|
||||||
|
every tracker assigns a different identifier to the same user, due to the
|
||||||
|
same-origin policy disallowing interaction with other trackers. Because of this,
|
||||||
|
sharing data between multiple trackers is difficult, since there are no easy
|
||||||
|
ways to accurately match an accumulated profile history of one identifier to
|
||||||
|
another. This problem has been solved by modern trackers by using a mechanism
|
||||||
|
called Cookie Synchronization or Cookie Matching. This technique allows multiple
|
||||||
|
trackers to open an information sharing channel between each other without
|
||||||
|
necessarily having to know the website the user visits.
|
||||||
|
|
||||||
|
\begin{figure}[ht]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=1\textwidth]{cookiesyncing}
|
||||||
|
\label{fig:cookie synchronization}
|
||||||
|
\caption{Cookie Synchronization in practice between two trackers
|
||||||
|
\emph{cloudflare.com} and \emph{google.com}.}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
An example of how Cookie Synchronization works in practice is given in
|
||||||
|
Figure~\ref{fig:cookie synchronization}. The two parties that are interested in
|
||||||
|
tracking the user are called \emph{cloudflare.com} and \emph{google.com} in this
|
||||||
|
example. The user they want to track is called \emph{browser}. \emph{Browser}
|
||||||
|
first visits \emph{website1.com} which loads JavaScript from
|
||||||
|
\emph{cloudflare.com}. \emph{Cloudflare.com} sets a cookie in the browser with a
|
||||||
|
tracking identifier called \emph{userID = 1234}. Next, \emph{browser} visits
|
||||||
|
another website called \emph{website2.com} which loads an advertisement banner
|
||||||
|
from \emph{google.com}. \emph{Google.com} also sets a cookie with the tracking
|
||||||
|
identifier \emph{userID = ABCD}. \emph{Browser} has now two cookies from two
|
||||||
|
different providers, each of them knowing the user under a different identifier.
|
||||||
|
When \emph{browser} visits a third website called \emph{website3.com} which
|
||||||
|
makes a request to \emph{cloudflare.com} and recognizes the user with the
|
||||||
|
identifier \emph{userID = 1234}, \emph{cloudflare.com} sends an \gls{HTTP}
|
||||||
|
redirect, redirecting \emph{browser} to \emph{google.com}. The redirect also
|
||||||
|
contains an \gls{HTTP} Query String (see section~\ref{subsec:passing information
|
||||||
|
in urls}) which adds a query like \emph{?userID=1234\&publisher=website3.com}.
|
||||||
|
The complete GET request to \emph{google.com} might look like this:
|
||||||
|
|
||||||
|
\begin{minted}[frame=lines,framesep=2mm,bgcolor=light-gray,baselinestretch=1.2,fontsize=\scriptsize,linenos]{http}
|
||||||
|
GET /index.html?userID=1234&publisher=website3.com HTTP/1.1
|
||||||
|
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0
|
||||||
|
Host: google.com
|
||||||
|
Cookie: userID=ABCD
|
||||||
|
\end{minted}
|
||||||
|
|
||||||
|
\emph{Google.com} therefore not only knows that the user with the identifier
|
||||||
|
\emph{userID=ABCD} visited \emph{website3.com} but also that \emph{browser} is
|
||||||
|
the same user as \emph{userID=1234}. Since the identifiers can now be traced
|
||||||
|
back to the same person, the different cookies have been synchronized, allowing
|
||||||
|
the two trackers to exchange information about the user without him or her
|
||||||
|
knowing.
|
||||||
|
|
||||||
|
Cookie Synchronization has seen widespread adoption especially in \gls{RTB}
|
||||||
|
based auctions \cite{olejnikSellingPrivacyAuction2014}.
|
||||||
|
\citeauthor{papadopoulosCookieSynchronizationEverything2019}
|
||||||
|
\cite{papadopoulosCookieSynchronizationEverything2019} recorded and analyzed the
|
||||||
|
browsing habits of 850 users over a time period of one year and found that 97\%
|
||||||
|
of users with regular browsing activity were exposed to Cookie Synchronization
|
||||||
|
at least once. Furthermore, they found that ``[...] the average user receives
|
||||||
|
around 1 synchronization per 68 requests''
|
||||||
|
\cite[p.~7]{papadopoulosCookieSynchronizationEverything2019}. In
|
||||||
|
\cite{englehardtOnlineTracking1MillionSite2016} the authors crawl the top
|
||||||
|
100,000 sites and find that 45 of the top 50 (90\%) third parties and 460 of the
|
||||||
|
top 1000 (46\%) use Cookie Synchronization with at least one other party.
|
||||||
|
\emph{Doubleclick.net} being at the top sharing 108 cookies with 118 other third
|
||||||
|
parties. \citeauthor{papadopoulosExclusiveHowSynced2018} show in
|
||||||
|
\cite{papadopoulosExclusiveHowSynced2018} the threat that Cookie Synchronization
|
||||||
|
poses to encrypted \gls{TLS} sessions by performing the cookie-syncing over
|
||||||
|
unencrypted \gls{HTTP} even though the original request to the website was
|
||||||
|
encrypted. This highlights the serious privacy implications for users of
|
||||||
|
\gls{VPN} services trying to safeguard their traffic from a potentially
|
||||||
|
malicious \gls{ISP}.
|
||||||
|
|
||||||
|
\subsection{Silverlight Isolated Storage}
|
||||||
|
\label{subsec:silverlight isolated storage}
|
||||||
|
|
||||||
|
Silverlight Isolated Storage can also be used for storing data for tracking
|
||||||
|
purposes on the user's computer. It has been compared to Adobe's Flash
|
||||||
|
technology as it too requires a plugin from Microsoft to function. Available for
|
||||||
|
storage are 100 kilobytes which is the same amount Flash cookies can store.
|
||||||
|
Silverlight does not work in the private browsing mode and can only be cleaned
|
||||||
|
manually by deleting a hidden directory in the filesystem or by changing
|
||||||
|
settings in the Silverlight application. Silverlight's Isolated Storage is one
|
||||||
|
of the methods evercookie (section~\ref{subsec:evercookie}) uses to make
|
||||||
|
permanent deletion of cookies hard to do and to facilitate cookie respawning.
|
||||||
|
Usage of Silverlight has seen a steady decline since 2011 even though it has
|
||||||
|
been used by popular video streaming websites such as Netflix
|
||||||
|
\cite{NetflixBeginsRollOut2010} and Amazon. Microsoft did not include
|
||||||
|
Silverlight support in Windows 8 and declared end-of-life in a blog post for
|
||||||
|
October of 2021 \cite{SilverlightEndSupport2015}. Usage of Silverlight currently
|
||||||
|
hovers around 0.04\% for the top 10 million websites
|
||||||
|
\cite{w3techsUsageStatisticsSilverlight2020}.
|
||||||
|
|
||||||
|
\subsection{HTML5 Web Storage}
|
||||||
|
\label{subsec:html5 web storage}
|
||||||
|
|
||||||
|
HTML5 Web Storage comes in three different forms: HTML5 Global Storage, HTML5
|
||||||
|
Local Storage and HTML5 Session Storage. It is part of the HTML specification
|
||||||
|
\cite{whatwgHTMLStandard2020} and provides means for storing name-value pairs on
|
||||||
|
the user's computer. HTML5 Web Storage works similarly to cookies but enables
|
||||||
|
developers to manage transactions that are done by the user simultaneously but
|
||||||
|
in two different windows. Whereas with cookies the transaction can accidentally
|
||||||
|
be recorded twice, HTML5 Web Storage allows multiple windows to access the same
|
||||||
|
storage on the user's computer thereby avoiding this problem. In contrast to
|
||||||
|
cookies, which are sent to the server every time a request is made, HTML5 Storage
|
||||||
|
contents do not get sent to the web server. By default the storage limit is
|
||||||
|
configured to be 5 megabytes per origin \cite{whatwgHTMLStandard2020a}. Even
|
||||||
|
though this was only a recommendation by the standard, all modern browsers
|
||||||
|
adhere to it. More space can be allocated upon asking the user for permission to
|
||||||
|
do so.
|
||||||
|
|
||||||
|
Global Storage was part of an initial HTML5 draft and is accessible across
|
||||||
|
applications. Due to it violating the same-origin policy, most major browsers
|
||||||
|
have not implemented Global Storage.
|
||||||
|
|
||||||
|
Local Storage does, however, obey the same-origin policy by only allowing the
|
||||||
|
originating domain access to its name-value pairs. Every website has their own
|
||||||
|
separate storage area which maintains a clear separation of concerns.
|
||||||
|
|
||||||
|
\subsection{HTML5 Indexed Database API}
|
||||||
|
\label{subsec:html5 indexed database api}
|
||||||
|
|
||||||
|
\subsection{Web SQL Database}
|
||||||
|
\label{subsec:web sql database}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Cache-based Tracking Methods}
|
||||||
|
\label{sec:cache-based tracking methods}
|
||||||
|
|
||||||
|
\subsection{DNS Cache}
|
||||||
|
\label{subsec:dns cache}
|
||||||
|
|
||||||
|
\subsection{Browser Cache}
|
||||||
|
\label{subsec:browser cache}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
16
code/hidden-web-form.html
Normal file
16
code/hidden-web-form.html
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<form>
|
||||||
|
<div>
|
||||||
|
<label for="title">Post title:</label>
|
||||||
|
<input type="text" id="title" name="title" value="blog post">
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label for="content">Post content:</label>
|
||||||
|
<textarea id="content" name="content" cols="60" rows="5">
|
||||||
|
Welcome to my blog post!
|
||||||
|
</textarea>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<button type="submit">Update post</button>
|
||||||
|
</div>
|
||||||
|
<input type="hidden" id="userId" name="userId" value="5239asbd923fade923da">
|
||||||
|
</form>
|
||||||
5
code/permanent-cookie-header
Normal file
5
code/permanent-cookie-header
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
HTTP/2.0 200 OK
|
||||||
|
Content-type: text/html
|
||||||
|
Set-Cookie: cookie1=value1; Expires=Thu, 27 Feb 2020 08:56:00 GMT
|
||||||
|
|
||||||
|
[page content]
|
||||||
5
code/session-cookie-header
Normal file
5
code/session-cookie-header
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
HTTP/2.0 200 OK
|
||||||
|
Content-type: text/html
|
||||||
|
Set-Cookie: cookie1=value1
|
||||||
|
|
||||||
|
[page content]
|
||||||
BIN
figures/cookiesyncing.pdf
Normal file
BIN
figures/cookiesyncing.pdf
Normal file
Binary file not shown.
126
main.tex
Normal file
126
main.tex
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
\documentclass[a4paper,12pt,oneside]{scrreport}
|
||||||
|
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage[T1]{fontenc}
|
||||||
|
\usepackage[scaled]{helvet}
|
||||||
|
\usepackage{times}
|
||||||
|
\usepackage{subfiles}
|
||||||
|
\usepackage[english]{babel}
|
||||||
|
\usepackage[includeheadfoot,left=3.4cm,right=2.4cm,bottom=1.5cm,top=1.7cm]{geometry}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{microtype}
|
||||||
|
\usepackage{setspace}
|
||||||
|
\usepackage{fancyhdr}
|
||||||
|
\usepackage[hidelinks]{hyperref}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\usepackage{minted}
|
||||||
|
\usepackage{listings}
|
||||||
|
\usepackage{csquotes}
|
||||||
|
\usepackage{xr}
|
||||||
|
\usepackage[acronym]{glossaries}
|
||||||
|
\usepackage{lastpage}
|
||||||
|
|
||||||
|
\glsenablehyper
|
||||||
|
|
||||||
|
\setlength{\marginparwidth}{2cm}
|
||||||
|
\setlength{\parindent}{0pt}
|
||||||
|
\setlength{\parskip}{0.5em}
|
||||||
|
|
||||||
|
\usepackage{todonotes}
|
||||||
|
|
||||||
|
\fancypagestyle{frontmatter}{%
|
||||||
|
\fancyhead{}
|
||||||
|
\fancyfoot{}
|
||||||
|
\fancyfoot[C]{\thepage}
|
||||||
|
\renewcommand{\headrulewidth}{0pt}
|
||||||
|
\renewcommand{\footrulewidth}{0pt}
|
||||||
|
}
|
||||||
|
|
||||||
|
\definecolor{light-gray}{gray}{0.95}
|
||||||
|
|
||||||
|
\RedeclareSectionCommand[beforeskip=0.5cm,afterskip=1.5cm]{chapter}
|
||||||
|
\addtokomafont{chapter}{\normalfont\sffamily\huge}
|
||||||
|
\addtokomafont{section}{\normalfont\sffamily\Large}
|
||||||
|
\addtokomafont{subsection}{\normalfont\sffamily\large}
|
||||||
|
|
||||||
|
\usepackage[backend=biber,style=ieee,urldate=iso,date=iso,seconds=true]{biblatex}
|
||||||
|
|
||||||
|
\addbibresource{bibliography/references.bib}
|
||||||
|
|
||||||
|
\hypersetup{
|
||||||
|
linkcolor=black,
|
||||||
|
urlcolor=black,
|
||||||
|
citecolor=black,
|
||||||
|
breaklinks=true,
|
||||||
|
colorlinks=true,
|
||||||
|
frenchlinks=true,
|
||||||
|
linktoc = all,
|
||||||
|
pdftitle = {Stateful Web Tracking: Techniques and Countermeasures},
|
||||||
|
pdfauthor = {Tobias Eidelpes}
|
||||||
|
}
|
||||||
|
|
||||||
|
\pagestyle{fancy}
|
||||||
|
|
||||||
|
\renewcommand{\chaptermark}[1]{\markboth{\chaptername\ \thechapter.\ #1}{}}
|
||||||
|
\renewcommand{\sectionmark}[1]{\markright{\arabic{chapter}.\arabic{section}.\ #1}}
|
||||||
|
\renewcommand {\headrulewidth}{0.4pt} % unterdruecken der Linie
|
||||||
|
\renewcommand {\footrulewidth}{0.4pt} % unterdruecken der Linie
|
||||||
|
|
||||||
|
\fancyhead{}
|
||||||
|
|
||||||
|
\fancyhead[L]{\leftmark}
|
||||||
|
\fancyhead[R]{\rightmark}
|
||||||
|
|
||||||
|
\fancyfoot{}
|
||||||
|
\fancyfoot[L]{Stateful Web Tracking: Techniques and Countermeasures}
|
||||||
|
\fancyfoot[R]{\thepage \ / \pageref{LastPage}}
|
||||||
|
|
||||||
|
\fancypagestyle{plain}{}
|
||||||
|
|
||||||
|
\graphicspath{{figures/}{../figures/}}
|
||||||
|
|
||||||
|
\setstretch{1.1}
|
||||||
|
|
||||||
|
\makeglossaries
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\input{abbrev/acronym.tex}
|
||||||
|
|
||||||
|
\pagenumbering{roman}
|
||||||
|
|
||||||
|
\subfile{chapters/abstract-de}
|
||||||
|
\thispagestyle{frontmatter}
|
||||||
|
|
||||||
|
\subfile{chapters/abstract-en}
|
||||||
|
\thispagestyle{frontmatter}
|
||||||
|
|
||||||
|
\tableofcontents
|
||||||
|
\thispagestyle{frontmatter}
|
||||||
|
|
||||||
|
\listoffigures
|
||||||
|
\thispagestyle{frontmatter}
|
||||||
|
|
||||||
|
\listoflistings
|
||||||
|
\thispagestyle{frontmatter}
|
||||||
|
|
||||||
|
\printglossaries
|
||||||
|
\thispagestyle{frontmatter}
|
||||||
|
|
||||||
|
\subfile{chapters/introduction}
|
||||||
|
|
||||||
|
\pagenumbering{arabic}
|
||||||
|
|
||||||
|
\subfile{chapters/methods}
|
||||||
|
|
||||||
|
\subfile{chapters/defences}
|
||||||
|
|
||||||
|
\subfile{chapters/implications}
|
||||||
|
|
||||||
|
\subfile{chapters/developments}
|
||||||
|
|
||||||
|
\subfile{chapters/conclusion}
|
||||||
|
|
||||||
|
\printbibliography
|
||||||
|
|
||||||
|
\end{document}
|
||||||
Loading…
x
Reference in New Issue
Block a user