diff --git a/abbrev/acronym.tex b/abbrev/acronym.tex index 0f0f25f..39d5776 100644 --- a/abbrev/acronym.tex +++ b/abbrev/acronym.tex @@ -1,23 +1,24 @@ -\newacronym {INSO} {INSO} {Industrial Software} -\newacronym {TU} {TU} {Technische Universit\"at Wien} -\newacronym {ZID} {ZID} {Zentraler Informatikdienst} -\newacronym {KISS} {KISS} {Keep It Sober and Significant} -\newacronym {URL} {URL} {Uniform Resource Locator} -\newacronym {URI} {URI} {Uniform Resource Identifier} -\newacronym {WWW} {WWW} {World Wide Web} -\newacronym {HTTP} {HTTP} {Hypertext Transfer Protocol} -\newacronym {HTML} {HTML} {Hypertext Markup Language} -\newacronym {DOM} {DOM} {Document Object Model} -\newacronym {API} {API} {Application Programming Interface} -\newacronym {XHTML} {XHTML} {Extensible Hypertext Markup Language} -\newacronym {XML} {XML} {Extensible Markup Language} -\newacronym {W3C} {W3C} {World Wide Web Consortium} -\newacronym {WHATWG} {WHATWG} {Web Hypertext Application Technology Working Group} -\newacronym {JSON} {JSON} {JavaScript Object Notation} -\newacronym {PII} {PII} {Personally Identifiable Information} -\newacronym {LSO} {LSO} {Local Shared Object} -\newacronym {CSS} {CSS} {Cascading Style Sheets} -\newacronym {RTB} {RTB} {Real Time Bidding} -\newacronym {TLS} {TLS} {Transport Layer Security} -\newacronym {VPN} {VPN} {Virtual Private Network} -\newacronym {ISP} {ISP} {Internet Service Provider} +\newacronym {INSO} {INSO} {Industrial Software} +\newacronym {TU} {TU} {Technische Universit\"at Wien} +\newacronym {ZID} {ZID} {Zentraler Informatikdienst} +\newacronym {KISS} {KISS} {Keep It Sober and Significant} +\newacronym {URL} {URL} {Uniform Resource Locator} +\newacronym {URI} {URI} {Uniform Resource Identifier} +\newacronym {WWW} {WWW} {World Wide Web} +\newacronym {HTTP} {HTTP} {Hypertext Transfer Protocol} +\newacronym {HTML} {HTML} {Hypertext Markup Language} +\newacronym {DOM} {DOM} {Document Object Model} +\newacronym {API} {API} {Application Programming Interface} +\newacronym {XHTML} {XHTML} {Extensible Hypertext Markup Language} +\newacronym {XML} {XML} {Extensible Markup Language} +\newacronym {W3C} {W3C} {World Wide Web Consortium} +\newacronym {WHATWG} {WHATWG} {Web Hypertext Application Technology Working Group} +\newacronym {JSON} {JSON} {JavaScript Object Notation} +\newacronym {PII} {PII} {Personally Identifiable Information} +\newacronym {LSO} {LSO} {Local Shared Object} +\newacronym {CSS} {CSS} {Cascading Style Sheets} +\newacronym {RTB} {RTB} {Real Time Bidding} +\newacronym {TLS} {TLS} {Transport Layer Security} +\newacronym {VPN} {VPN} {Virtual Private Network} +\newacronym {ISP} {ISP} {Internet Service Provider} +\newacronym {SQL} {SQL} {Structured Query Language} diff --git a/bibliography/references.bib b/bibliography/references.bib index 026c04e..0c633c6 100644 --- a/bibliography/references.bib +++ b/bibliography/references.bib @@ -23,6 +23,27 @@ langid = {american} } +@inproceedings{akkusNontrackingWebAnalytics2012, + title = {Non-Tracking Web Analytics}, + booktitle = {Proceedings of the 2012 {{ACM}} Conference on {{Computer}} and Communications Security}, + author = {Akkus, Istemi Ekin and Chen, Ruichuan and Hardt, Michaela and Francis, Paul and Gehrke, Johannes}, + date = {2012-10-16}, + pages = {687--698}, + publisher = {{Association for Computing Machinery}}, + doi = {10.1145/2382196.2382268}, + abstract = {Today, websites commonly use third party web analytics services t obtain aggregate information about users that visit their sites. This information includes demographics and visits to other sites as well as user behavior within their own sites. Unfortunately, to obtain this aggregate information, web analytics services track individual user browsing behavior across the web. This violation of user privacy has been strongly criticized, resulting in tools that block such tracking as well as anti-tracking legislation and standards such as Do-Not-Track. These efforts, while improving user privacy, degrade the quality of web analytics. This paper presents the first design of a system that provides web analytics without tracking. The system gives users differential privacy guarantees, can provide better quality analytics than current services, requires no new organizational players, and is practical to deploy. This paper describes and analyzes the design, gives performance benchmarks, and presents our implementation and deployment across several hundred users.}, + series = {{{CCS}} '12} +} + +@misc{alabbasIndexedDatabaseAPI2020, + title = {Indexed {{Database API}} 3.0}, + author = {Alabbas, Ali and Bell, Joshua}, + date = {2020-03-20}, + publisher = {{W3C}}, + url = {https://w3c.github.io/IndexedDB/}, + urldate = {2020-03-20} +} + @article{aonghusaDontLetGoogle2016, title = {Dont {{Let Google Know Im Lonely}}}, author = {Aonghusa, P\'ol Mac and Leith, Douglas J.}, @@ -157,6 +178,19 @@ eprintclass = {cs} } +@article{belloroKnowWhatYou2018, + title = {I {{Know What You Did Last Summer}}: {{New Persistent Tracking Mechanisms}} in the {{Wild}}}, + shorttitle = {I {{Know What You Did Last Summer}}}, + author = {Belloro, Stefano and Mylonas, Alexios}, + date = {2018}, + journaltitle = {IEEE Access}, + volume = {6}, + pages = {52779--52792}, + doi = {10.1109/ACCESS.2018.2869251}, + abstract = {As the usage of the Web increases, so do the threats an everyday user faces. One of the most pervasive threats a Web user faces is tracking, which enables an entity to gain unauthorized access to the user's personal data. Through the years, many client storage technologies, such as cookies, have been used for this purpose and have been extensively studied in the literature. The focus of this paper is on three newer client storage mechanisms, namely, Web Storage, Web SQL Database, and Indexed Database API. Initially, a large-scale analysis of their usage on the Web is conducted to appraise their usage in the wild. Then, this paper examines the extent that they are used for tracking purposes. The results suggest that Web Storage is the most used among the three technologies. More importantly, to the best of our knowledge, this paper is the first to suggest Web tracking as the main use case of these technologies. Motivated by these results, this paper examines whether popular desktop and mobile browsers protect their users from tracking mechanisms that use Web Storage, Web SQL Database, and Indexed Database. Our results uncover many cases where the relevant security controls are ineffective, thus making it virtually impossible for certain users to avoid tracking.}, + note = {Conference Name: IEEE Access} +} + @report{berners-leeUniformResourceLocators1994, title = {Uniform {{Resource Locators}} ({{URL}})}, author = {Berners-Lee, Timothy and Masinter, Larry and McCahill, Mark}, @@ -438,6 +472,15 @@ number = {2} } +@misc{hicksonWebSQLDatabase2010, + title = {Web {{SQL Database}}}, + author = {Hickson, Ian and Google Inc.}, + date = {2010-11-18}, + publisher = {{W3C}}, + url = {https://www.w3.org/TR/webdatabase/}, + urldate = {2020-03-20} +} + @article{huCharacterisingThirdParty2019, title = {Characterising {{Third Party Cookie Usage}} in the {{EU}} after {{GDPR}}}, author = {Hu, Xuehui and Sastry, Nishanth}, @@ -1240,18 +1283,6 @@ The goal of the DOM specification is to define a programmatic interface for XML series = {{{ANRW}} '18} } -@article{westAnalysisPrivacySecurity2012, - title = {Analysis of Privacy and Security in {{HTML5}} Web Storage}, - author = {West, William and Pulimood, S. Monisha}, - date = {2012-01-01}, - journaltitle = {Journal of Computing Sciences in Colleges}, - shortjournal = {J. Comput. Sci. Coll.}, - volume = {27}, - pages = {80--87}, - abstract = {There is no doubt that the web has evolved from a simple media consumption device to an extremely complex programming platform over the past couple of decades. With the exponential growth of Internet use, web applications are becoming increasingly popular: they are easy to distribute, simple to update, and widely accessible. However, a uniform programming method for developing web applications does not currently exist. Developers must be experts in and juggle a combination of different languages in order to create fully functional web applications. W3C's introduction of HTML5 attempts to alleviate this problem [8]. Their Web Storage specification offers a method for storing client-side data as an alternative to the use of cookies in web applications. In this paper, the Web Storage specification is analyzed through an in-depth discussion of the privacy, security, and performance of current and future web technologies. The advantages and disadvantages of the localStorage and sessionStorage attributes are discussed, with special consideration given to their impact on privacy and security. Analysis is done in the context of a custom web application, offering a suggested framework for applications utilizing HTML5 Web Storage.}, - number = {3} -} - @article{westMeasuringPrivacyDisclosures2014, title = {Measuring {{Privacy Disclosures}} in {{URL Query Strings}}}, author = {West, Andrew G. and Aviv, Adam J.}, @@ -1301,6 +1332,18 @@ The goal of the DOM specification is to define a programmatic interface for XML abstract = {Four years ago, we wrote about YouTube's early support for the HTML5 {$<$}video{$>$} tag and how it performed compared to Flash. At the time, there...} } +@inproceedings{yuTrackingTrackers2016, + title = {Tracking the {{Trackers}}}, + booktitle = {Proceedings of the 25th {{International Conference}} on {{World Wide Web}}}, + author = {Yu, Zhonghao and Macbeth, Sam and Modi, Konark and Pujol, Josep M.}, + date = {2016-04-11}, + pages = {121--132}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + doi = {10.1145/2872427.2883028}, + abstract = {Online tracking poses a serious privacy challenge that has drawn significant attention in both academia and industry. Existing approaches for preventing user tracking, based on curated blocklists, suffer from limited coverage and coarse-grained resolution for classification, rely on exceptions that impact sites' functionality and appearance, and require significant manual maintenance. In this paper we propose a novel approach, based on the concepts leveraged from \$k\$-Anonymity, in which users collectively identify unsafe data elements, which have the potential to identify uniquely an individual user, and remove them from requests. We deployed our system to 200,000 German users running the Cliqz Browser or the Cliqz Firefox extension to evaluate its efficiency and feasibility. Results indicate that our approach achieves better privacy protection than blocklists, as provided by Disconnect, while keeping the site breakage to a minimum, even lower than the community-optimized AdBlock Plus. We also provide evidence of the prevalence and reach of trackers to over 21 million pages of 350,000 unique sites, the largest scale empirical evaluation to date. 95\% of the pages visited contain 3rd party requests to potential trackers and 78\% attempt to transfer unsafe data. Tracker organizations are also ranked, showing that a single organization can reach up to 42\% of all page visits in Germany.}, + series = {{{WWW}} '16} +} + @online{zypWindowNameTransport2008, title = {Window.Name {{Transport}}}, author = {Zyp, Kris}, diff --git a/chapters/methods.tex b/chapters/methods.tex index 82bd759..d170a43 100644 --- a/chapters/methods.tex +++ b/chapters/methods.tex @@ -539,7 +539,7 @@ the full range of features in offline-mode is feasible. The third category of HTML5 Web Storage is similar to Local Storage, but requires that the stored data be deleted after a session is closed. While -content that is persisted by Local Storage, must be deleted explicitly by the +content that is persisted by Local Storage must be deleted explicitly by the user, Session Storage has the intended function of providing non-persistent storage. @@ -553,9 +553,58 @@ section~\ref{subsec:evercookie}). \subsection{HTML5 Indexed Database API} \label{subsec:html5 indexed database api} +The need for client side storage to provide performant web applications that can +also function offline, has prompted the inception of alternative methods to +store and retrieve information. Consequently, the development of the HTML5 +standard has tried to fill that need by introducing HTML5 Web Storage and the +HTML5 Indexed Database \gls{API}. + +HTML5 Indexed Database \gls{API} provides an interface for storing values and +hierarchical objects using the well-known key-value pair storage principle +\cite{alabbasIndexedDatabaseAPI2020}. This property makes it similar to NoSQL +storage solutions which have seen increasing adoption rates on the web. It is +the successor to the abandonend Web SQL Database (see section~\ref{subsec:web +sql database}) standard and functions similarly to the HTML5 Web Storage, +meaning that it has the same storage limits and privacy implications and has to +obey the same-origin policy. In contrast to HTML5 Web Storage, IndexedDB is +intended for storing larger amounts of data and provides additional functions +such as in-order key retrieval. Reading from and writing to an IndexedDB is done +with JavaScript by opening a connection to the database, preparing a transaction +and committing it. The development of the standard is ongoing with two editions +already published and recommended by the W3C and the third edition existing as +an editors draft until it is ready for recommendation. + +HTML5 IndexedDB has been added to the evercookie library (see +section~\ref{subsec:evercookie}) by +\citeauthor{kamkarEvercookieVirtuallyIrrevocable2010}, providing redundancy for +\gls{HTTP} cookies. \citeauthor{acarWebNeverForgets2014} +\cite{acarWebNeverForgets2014} have shown that only 20 of 100.000 surveyed sites +use the IndexedDB storage vector with one of them (\texttt{weibo.com}) using it +for respawning \gls{HTTP} cookies. + \subsection{Web SQL Database} \label{subsec:web sql database} +Web SQL Database \cite{hicksonWebSQLDatabase2010} was initially developed to +provide an \gls{API} for storing data in databases. The stored data can then be +queried using \gls{SQL} or variants thereof. The W3C stopped the development of +the standard in 2010 due to a lack of other backend implementations (other than +SQLite) which is necessary for a recommendation as a standard. Browsers have +turned to HTML5 IndexedDB (see section~\ref{subsec:html5 indexed database api}), +the ``spiritual successor'' to Web SQL Database, for web database storage. + +In the same way that other tracking technologies can maintain a history of web +site visits and actions, Web SQL Database can store identifying information via +the usage of unique identifiers. An arbitrary maximum size of 5 megabytes of +storage per origin is recommended by the standard, with the possibility to ask +the user for more capacity. This limit includes other domains which are +affiliated with the origin but have a different name (e.g. subdomains). + +Due to the W3C abandoning the Web SQL Database standard, not many reports on +usage for tracking purposes exist. The method has been added, however, to the +evercookie library by \citeauthor{kamkarEvercookieVirtuallyIrrevocable2010} (see +section~\ref{subsec:evercookie}) to add another layer of redundancy for storing +unique identifiers and respawning deleted ones. \section{Cache-based Tracking Methods} \label{sec:cache-based tracking methods}