diff --git a/bibliography/references.bib b/bibliography/references.bib index 0c633c6..bdd8bbe 100644 --- a/bibliography/references.bib +++ b/bibliography/references.bib @@ -1143,6 +1143,16 @@ langid = {english} } +@online{soudersAnnouncingHTTPArchive2011, + title = {Announcing the {{HTTP Archive}} | {{High Performance Web Sites}}}, + author = {Souders, Steve}, + date = {2011-03-30}, + journaltitle = {stevesouders.com}, + url = {https://www.stevesouders.com/blog/2011/03/30/announcing-the-http-archive/}, + urldate = {2020-03-22}, + type = {blog} +} + @article{starovAreYouSure2016, title = {Are {{You Sure You Want}} to {{Contact Us}}? {{Quantifying}} the {{Leakage}} of {{PII}} via {{Website Contact Forms}}}, shorttitle = {Are {{You Sure You Want}} to {{Contact Us}}?}, diff --git a/chapters/methods.tex b/chapters/methods.tex index d170a43..cd0059c 100644 --- a/chapters/methods.tex +++ b/chapters/methods.tex @@ -548,7 +548,11 @@ used: by storing unique identifiers which are read on subsequent visits. \citeauthor{ayensonFlashCookiesPrivacy2011} \cite{ayensonFlashCookiesPrivacy2011} found that 17 of the top 100 web sites used HTML5 Web Storage with some of them using it for cookie respawing (see -section~\ref{subsec:evercookie}). +section~\ref{subsec:evercookie}). A recent survey by +\citeauthor{belloroKnowWhatYou2018} \cite{belloroKnowWhatYou2018} looks at Web +Storage usage in general and found that 83.09\% of the top 10K Alexa web sites +use it. The authors flagged 63.88\% of those usages as coming from known +tracking domains. \subsection{HTML5 Indexed Database API} \label{subsec:html5 indexed database api} @@ -580,7 +584,12 @@ section~\ref{subsec:evercookie}) by \gls{HTTP} cookies. \citeauthor{acarWebNeverForgets2014} \cite{acarWebNeverForgets2014} have shown that only 20 of 100.000 surveyed sites use the IndexedDB storage vector with one of them (\texttt{weibo.com}) using it -for respawning \gls{HTTP} cookies. +for respawning \gls{HTTP} cookies. A more recent study by +\citeauthor{belloroKnowWhatYou2018} \cite{belloroKnowWhatYou2018} paints a +different picture: On a dataset provided by the \gls{HTTP} Archive project +\cite{soudersAnnouncingHTTPArchive2011}, they found that 5.56\% of observed +sites use IndexedDB. Of those that use IndexedDB, 31.87\% of usages appear to be +coming from domains that are flagged as `trackers'. \subsection{Web SQL Database} \label{subsec:web sql database} @@ -592,6 +601,8 @@ the standard in 2010 due to a lack of other backend implementations (other than SQLite) which is necessary for a recommendation as a standard. Browsers have turned to HTML5 IndexedDB (see section~\ref{subsec:html5 indexed database api}), the ``spiritual successor'' to Web SQL Database, for web database storage. +Despite the W3C deprecating Web SQL Database, some browsers such as Chrome, +Safari and Opera still support it and have no plans of discontinuing it. In the same way that other tracking technologies can maintain a history of web site visits and actions, Web SQL Database can store identifying information via @@ -604,11 +615,30 @@ Due to the W3C abandoning the Web SQL Database standard, not many reports on usage for tracking purposes exist. The method has been added, however, to the evercookie library by \citeauthor{kamkarEvercookieVirtuallyIrrevocable2010} (see section~\ref{subsec:evercookie}) to add another layer of redundancy for storing -unique identifiers and respawning deleted ones. +unique identifiers and respawning deleted ones. By performing static analysis on +a dataset provided by the \gls{HTTP} Archive project +\cite{soudersAnnouncingHTTPArchive2011}, \citeauthor{belloroKnowWhatYou2018} +found that 1.34\% of the surveyed websites use Web SQL Database in one of their +subresources. 53.59\% of Web SQL Database usage are considered to be coming from +known tracking domains. This ratio is lower for the first 10K web sites as +determined by Alexa (in May 2018): 2.12\% use Web SQL Database and 39.9\% of +those use it for tracking. These percentages show that Web SQL Database is not +used as a means to provide new functionality in most cases, but to increase user +tracking capabilities. \section{Cache-based Tracking Methods} \label{sec:cache-based tracking methods} +While the underlying principle of storing unique identifiers on the user agent's +computer remains the same, cache-based methods exploit a type of storage that is +normally used for data that is saved for short periods of time and most commonly +serves to improve performance. Whereas storage-based tracking methods (see +section~\ref{sec:storage-based tracking methods}) exploit storage interfaces +that are meant for persisting data to disk, caches store data that has been +generated by an operation and can be served faster on subsequent requests. + +This section is divided into + \subsection{DNS Cache} \label{subsec:dns cache}