Add text for web cache tracking

2020-03-25 10:55:01 +01:00 · 2020-03-25 10:55:01 +01:00 · 29219c30c9
commit 29219c30c9
parent 4cb4c2c032
2 changed files with 129 additions and 4 deletions
--- a/bibliography/references.bib
+++ b/bibliography/references.bib
@ -107,6 +107,15 @@
  type = {SSRN Scholarly Paper}
 }
@online{baronPreventingAttacksUser2010,
  title = {Preventing Attacks on a User's History through {{CSS}} :Visited Selectors},
  author = {Baron, David},
  date = {2010-03-09},
  journaltitle = {dbaron.org},
  url = {https://dbaron.org/mozilla/visited-privacy},
  urldate = {2020-03-25}
 }
@report{barthHTTPStateManagement2011,
  title = {{{HTTP State Management Mechanism}}},
  author = {Barth, A.},
@ -388,6 +397,17 @@
  series = {Lecture {{Notes}} in {{Computer Science}}}
 }
@inproceedings{feltenTimingAttacksWeb2000,
  title = {Timing Attacks on {{Web}} Privacy},
  booktitle = {Proceedings of the 7th {{ACM}} Conference on {{Computer}} and {{Communications Security}}},
  author = {Felten, Edward W. and Schneider, Michael A.},
  date = {2000-11-01},
  pages = {25--32},
  publisher = {{Association for Computing Machinery}},
  doi = {10.1145/352600.352606},
  series = {{{CCS}} '00}
 }
@article{frankenExposingCookiePolicy2019,
  title = {Exposing {{Cookie Policy Flaws Through}} an {{Extensive Evaluation}} of {{Browsers}} and {{Their Extensions}}},
  author = {Franken, Gertjan and Van Goethem, Tom and Joosen, Wouter},
@ -565,6 +585,32 @@
  series = {{{IMC}} '17}
 }
@inproceedings{jacksonProtectingBrowserState2006,
  title = {Protecting Browser State from Web Privacy Attacks},
  booktitle = {Proceedings of the 15th International Conference on {{World Wide Web}}},
  author = {Jackson, Collin and Bortz, Andrew and Boneh, Dan and Mitchell, John C.},
  date = {2006-05-23},
  pages = {737--744},
  publisher = {{Association for Computing Machinery}},
  doi = {10.1145/1135777.1135884},
  abstract = {Through a variety of means, including a range of browser cache methods and inspecting the color of a visited hyperlink, client-side browser state can be exploited to track users against their wishes. This tracking is possible because persistent, client-side browser state is not properly partitioned on per-site basis in current browsers. We address this problem by refining the general notion of a "same-origin" policy and implementing two browser extensions that enforce this policy on the browser cache and visited links.We also analyze various degrees of cooperation between sites to track users, and show that even if long-term browser state is properly partitioned, it is still possible for sites to use modern web features to bounce users between sites and invisibly engage in cross-domain tracking of their visitors. Cooperative privacy attacks are an unavoidable consequence of all persistent browser state that affects the behavior of the browser, and disabling or frequently expiring this state is the only way to achieve true privacy against colluding parties.},
  series = {{{WWW}} '06}
 }
@inproceedings{jancWebBrowserHistory2010,
  title = {Web {{Browser History Detection}} as a {{Real}}-{{World Privacy Threat}}},
  booktitle = {Computer {{Security}} \textendash{} {{ESORICS}} 2010},
  author = {Janc, Artur and Olejnik, Lukasz},
  editor = {Gritzalis, Dimitris and Preneel, Bart and Theoharidou, Marianthi},
  date = {2010},
  pages = {215--231},
  publisher = {{Springer}},
  doi = {10.1007/978-3-642-15497-3_14},
  abstract = {Web browser history detection using CSS visited styles has long been dismissed as an issue of marginal impact. However, due to recent changes in Web usage patterns, coupled with browser performance improvements, the long-standing issue has now become a significant threat to the privacy of Internet users.In this paper we analyze the impact of CSS-based history detection and demonstrate the feasibility of conducting practical attacks with minimal resources. We analyze Web browser behavior and detectability of content loaded via standard protocols and with various HTTP response codes. We develop an algorithm for efficient examination of large link sets and evaluate its performance in modern browsers. Compared to existing methods our approach is up to 6 times faster, and is able to detect up to 30,000 visited links per second.We present a novel Web application capable of effectively detecting clients' browsing histories and discuss real-world results obtained from 271,576 Internet users. Our results indicate that at least 76\% of Internet users are vulnerable to history detection, including over 94\% of Google Chrome users; for a test of most popular Internet websites we were able to detect, on average, 62.6 (median 22) visited locations per client. We also demonstrate the potential to profile users based on social news stories they visited, and to detect private data such as zipcodes or search queries typed into online forms.},
  langid = {english},
  series = {Lecture {{Notes}} in {{Computer Science}}}
 }
@article{johansenMakingGDPRUsable2019,
  title = {Making {{GDPR Usable}}: {{A Model}} to {{Support Usability Evaluations}} of {{Privacy}}},
  shorttitle = {Making {{GDPR Usable}}},
@ -1228,6 +1274,19 @@
  langid = {english}
 }
@inproceedings{vangoethemClockStillTicking2015,
  title = {The {{Clock}} Is {{Still Ticking}}: {{Timing Attacks}} in the {{Modern Web}}},
  shorttitle = {The {{Clock}} Is {{Still Ticking}}},
  booktitle = {Proceedings of the 22nd {{ACM SIGSAC Conference}} on {{Computer}} and {{Communications Security}}},
  author = {Van Goethem, Tom and Joosen, Wouter and Nikiforakis, Nick},
  date = {2015-10-12},
  pages = {1382--1393},
  publisher = {{Association for Computing Machinery}},
  doi = {10.1145/2810103.2813632},
  abstract = {Web-based timing attacks have been known for over a decade, and it has been shown that, under optimal network conditions, an adversary can use such an attack to obtain information on the state of a user in a cross-origin website. In recent years, desktop computers have given way to laptops and mobile devices, which are mostly connected over a wireless or mobile network. These connections often do not meet the optimal conditions that are required to reliably perform cross-site timing attacks. In this paper, we show that modern browsers expose new side-channels that can be used to acquire accurate timing measurements, regardless of network conditions. Using several real-world examples, we introduce four novel web-based timing attacks against modern browsers and describe how an attacker can use them to obtain personal information based on a user's state on a cross-origin website. We evaluate our proposed attacks and demonstrate that they significantly outperform current attacks in terms of speed, reliability, and accuracy. Furthermore, we show that the nature of our attacks renders traditional defenses, i.e., those based on randomly delaying responses, moot and discuss possible server-side defense mechanisms.},
  series = {{{CCS}} '15}
 }
@article{venkatadriInvestigatingSourcesPII2019,
  title = {Investigating Sources of {{PII}} Used in {{Facebook}}'s Targeted Advertising},
  author = {Venkatadri, Giridhari and Lucherini, Elena and Sapiezynski, Piotr and Mislove, Alan},
@ -1332,6 +1391,17 @@ The goal of the DOM specification is to define a programmatic interface for XML
  urldate = {2020-02-27}
 }
@inproceedings{wondracekPracticalAttackDeanonymize2010,
  title = {A {{Practical Attack}} to {{De}}-Anonymize {{Social Network Users}}},
  booktitle = {2010 {{IEEE Symposium}} on {{Security}} and {{Privacy}}},
  author = {Wondracek, Gilbert and Holz, Thorsten and Kirda, Engin and Kruegel, Christopher},
  date = {2010-05},
  pages = {223--238},
  doi = {10.1109/SP.2010.21},
  abstract = {Social networking sites such as Facebook, LinkedIn, and Xing have been reporting exponential growth rates and have millions of registered users. In this paper, we introduce a novel de-anonymization attack that exploits group membership information that is available on social networking sites. More precisely, we show that information about the group memberships of a user (i.e., the groups of a social network to which a user belongs) is sufficient to uniquely identify this person, or, at least, to significantly reduce the set of possible candidates. That is, rather than tracking a user's browser as with cookies, it is possible to track a person. To determine the group membership of a user, we leverage well-known web browser history stealing attacks. Thus, whenever a social network user visits a malicious website, this website can launch our de-anonymization attack and learn the identity of its visitors. The implications of our attack are manifold, since it requires a low effort and has the potential to affect millions of social networking users. We perform both a theoretical analysis and empirical measurements to demonstrate the feasibility of our attack against Xing, a medium-sized social network with more than eight million members that is mainly used for business relationships. Furthermore, we explored other, larger social networks and performed experiments that suggest that users of Facebook and LinkedIn are equally vulnerable.},
  eventtitle = {2010 {{IEEE Symposium}} on {{Security}} and {{Privacy}}}
 }
@online{youtubeengineeringYouTubeNowDefaults2015,
  title = {{{YouTube}} Now Defaults to {{HTML5}} {$<$}video{$>$}},
  author = {YouTube Engineering},
--- a/chapters/methods.tex
+++ b/chapters/methods.tex
@ -637,12 +637,67 @@ section~\ref{sec:storage-based tracking methods}) exploit storage interfaces
 that are meant for persisting data to disk, caches store data that has been
 generated by an operation and can be served faster on subsequent requests.
-This section is divided into 
+A variety of caches exist and they are utilized for different purposes, leading
 to different forms of information exploitability for tracking users. This
 section introduces methods which are in most cases not prevalent but are more
 sophisticated and can thus be much harder to circumvent or block.
 \todo{Insert structure}
 \subsection{Web Cache}
 \label{subsec:web cache}
 Using the \gls{DOM} \gls{API}'s \texttt{Window.getComputedStyle()} method,
 websites were able to check a user's browsing history by utilizing the \gls{CSS}
 \texttt{:visited} selector. Links can be coloured depending on whether they have
 already been visited or not. The colours can be set by the website trying to
 find out what the user's browsing history is. JavaScript would then be used to
 generate links on the fly for websites that will be cross-checked with the
 contents of the browsing history. After generating links, a script can check the
 colour, compare it with the colour that has been set for visited and non-visited
 websites and see if a website has already been visited or not.
 A solution to the problem has been proposed and subsequently implemented by
 \citeauthor{baronPreventingAttacksUser2010}
 \cite{baronPreventingAttacksUser2010} in 2010, making
 \texttt{getComputedStyle()} and similar functions lie about the state of the
 visited links and marking them as unvisited. Another solution has been developed
 by \citeauthor{jacksonProtectingBrowserState2006}
 \cite{jacksonProtectingBrowserState2006} in form of a browser extension that
 enforces the same-origin policy for browser histories as well. Although their
 approach limits access to a user's browsing history by third parties, first
 parties are unencumbered by the same-origin policy. Their browser extension
 does, however, thwart the attack carried out by
 \citeauthor{jancWebBrowserHistory2010} in \cite{jancWebBrowserHistory2010} where
 the authors were able to check for up to 30.000 links per second.
 \citeauthor{wondracekPracticalAttackDeanonymize2010}
 \cite{wondracekPracticalAttackDeanonymize2010} demonstrate the severity of
 history stealing attacks (e.g. visited link differentiation) on user privacy by
 probing for \glspl{URL} that encode user information such as group membership in
 social networks. By constructing a set of group memberships for each user, the
 results can uniquely identify a person. Furthermore, information that is not yet
 attributed to a single user but to a group as a whole can be used to more
 accurately identify members of said group.
 Other ways of utilizing a web browser's cache to track users are tracking
 whether a website asset (e.g., an image or script) has already been cached by
 the user agent or not. If it has been cached, the website knows that is has been
 visited before and if it has not been cached (the asset is downloaded from the
 server), the user agent visits for the first time. Another way is to embed
 identifiers in cached documents. An \gls{HTML} file can contain an identifier
 which is stored in a \texttt{<div>} tag and is cached by the user agent. The
 identifier can then be read from the cache on subsequent visits, even from third
 party websites.
 \subsection{Cache Timing}
 \label{subsec:cache timing}
 \subsection{Cache Control Directives}
 \label{subsec:cache control directives}
 \subsection{DNS Cache}
 \label{subsec:dns cache}
 \subsection{Browser Cache}
 \label{subsec:browser cache}
 \end{document}