@inproceedings{acarWebNeverForgets2014, title = {The {{Web Never Forgets}}: {{Persistent Tracking Mechanisms}} in the {{Wild}}}, shorttitle = {The {{Web Never Forgets}}}, booktitle = {Proceedings of the 2014 {{ACM SIGSAC Conference}} on {{Computer}} and {{Communications Security}}}, author = {Acar, Gunes and Eubank, Christian and Englehardt, Steven and Juarez, Marc and Narayanan, Arvind and Diaz, Claudia}, year = {2014}, month = nov, pages = {674--689}, abstract = {We present the first large-scale studies of three advanced web tracking mechanisms - canvas fingerprinting, evercookies and use of "cookie syncing" in conjunction with evercookies. Canvas fingerprinting, a recently developed form of browser fingerprinting, has not previously been reported in the wild; our results show that over 5\% of the top 100,000 websites employ it. We then present the first automated study of evercookies and respawning and the discovery of a new evercookie vector, IndexedDB. Turning to cookie syncing, we present novel techniques for detection and analysing ID flows and we quantify the amplification of privacy-intrusive tracking practices due to cookie syncing. Our evaluation of the defensive techniques used by privacy-aware users finds that there exist subtle pitfalls --- such as failing to clear state on multiple browsers at once - in which a single lapse in judgement can shatter privacy defenses. This suggests that even sophisticated users face great difficulties in evading tracking techniques.}, series = {{{CCS}} '14} } @misc{Adblock, title = {Adblock {{Plus}}}, url = {https://adblockplus.org/en/}, urldate = {2020-07-12}, abstract = {Adblock Plus, the most popular ad blocker on Firefox, Chrome, Safari, Android and iOS. Block pop-ups and annoying ads on websites like Facebook and YouTube.}, note = {Accessed 2020-07-12} } @misc{adobeAdobeFlashPlatform, title = {Adobe~{{Flash~Platform}} * {{Shared}} Objects}, author = {Adobe}, url = {https://help.adobe.com/en_US/as3/dev/WS5b3ccc516d4fbf351e63e3d118a9b90204-7d80.html}, urldate = {2020-08-10}, journal = {Shared Objects}, note = {Accessed 2020-08-10}, type = {Developer's {{Guide}}} } @misc{adobecorporatecommunicationsFlashFutureInteractive2017, title = {Flash \& {{The Future}} of {{Interactive Content}}}, author = {Adobe Corporate Communications}, year = {2017}, month = jul, url = {https://theblog.adobe.com/adobe-flash-update/}, urldate = {2020-02-17}, abstract = {Adobe has long played a leadership role in advancing interactivity and creative content \textendash{} from video, to games and more \textendash{} on the web. Where we've seen a need to pus...}, journal = {Adobe Blog}, note = {Accessed 2020-08-10} } @inproceedings{akkusNontrackingWebAnalytics2012, title = {Non-Tracking Web Analytics}, booktitle = {Proceedings of the 2012 {{ACM}} Conference on {{Computer}} and Communications Security}, author = {Akkus, Istemi Ekin and Chen, Ruichuan and Hardt, Michaela and Francis, Paul and Gehrke, Johannes}, year = {2012}, month = oct, pages = {687--698}, abstract = {Today, websites commonly use third party web analytics services t obtain aggregate information about users that visit their sites. This information includes demographics and visits to other sites as well as user behavior within their own sites. Unfortunately, to obtain this aggregate information, web analytics services track individual user browsing behavior across the web. This violation of user privacy has been strongly criticized, resulting in tools that block such tracking as well as anti-tracking legislation and standards such as Do-Not-Track. These efforts, while improving user privacy, degrade the quality of web analytics. This paper presents the first design of a system that provides web analytics without tracking. The system gives users differential privacy guarantees, can provide better quality analytics than current services, requires no new organizational players, and is practical to deploy. This paper describes and analyzes the design, gives performance benchmarks, and presents our implementation and deployment across several hundred users.}, annote = {Tracking defense mechanisms. Analytics ohne Privatsph\"arengef\"ahrdung}, series = {{{CCS}} '12} } @misc{alabbasIndexedDatabaseAPI2020, title = {Indexed {{Database API}} 3.0}, author = {Alabbas, Ali and Bell, Joshua}, year = {2020}, month = mar, url = {https://w3c.github.io/IndexedDB/}, urldate = {2020-03-20}, note = {Accessed 2020-03-20} } @article{aonghusaDontLetGoogle2016, title = {Dont {{Let Google Know Im Lonely}}}, author = {Aonghusa, P{\'o}l Mac and Leith, Douglas J.}, year = {2016}, month = aug, volume = {19}, abstract = {From buying books to finding the perfect partner, we share our most intimate wants and needs with our favourite online systems. But how far should we accept promises of privacy in the face of perso...}, journal = {ACM Trans. Priv. Secur. TOPS}, number = {1} } @inproceedings{arefiPIITrackerAutomaticTracking2018, title = {{{PIITracker}}: {{Automatic Tracking}} of {{Personally Identifiable Information}} in {{Windows}}}, shorttitle = {{{PIITracker}}}, booktitle = {Proceedings of the 11th {{European Workshop}} on {{Systems Security}}}, author = {Arefi, Meisam Navaki and Alexander, Geoffrey and Crandall, Jedidiah R.}, year = {2018}, month = apr, pages = {1--6}, abstract = {Personally Identifiable Information (PII) is information that can be used on its own or with other information to distinguish or trace an individual's identity. To investigate an application for PII tracking, a reverse engineer has to put considerable effort to reverse engineer an application and discover what an application does with PII. To automate this process and save reverse engineers substantial time and effort, we propose PIITracker which is a new and novel tool that can track PII automatically and capture if any processes are sending PII over the network. This is made possible by 1) whole-system dynamic information flow tracking 2) monitoring specific function and system calls. We analyzed 15 popular chat applications and browsers using PIITracker, and determined that 12 of these applications collect some form of PII.}, series = {{{EuroSec}}'18} } @article{argyrosEvaluatingPrivacyGuarantees2017, title = {Evaluating the {{Privacy Guarantees}} of {{Location Proximity Services}}}, author = {Argyros, George and Petsios, Theofilos and Sivakorn, Suphannee and Keromytis, Angelos D. and Polakis, Jason}, year = {2017}, month = feb, volume = {19}, pages = {12:1--12:31}, abstract = {Location-based services have become an integral part of everyday life. To address the privacy issues that emerge from the use and sharing of location information, social networks and smartphone applications have adopted location proximity schemes as a means of balancing user privacy with utility. Unfortunately, despite the extensive academic literature on this topic, the schemes that large service providers have adopted are not always designed or implemented correctly, rendering users vulnerable to location-disclosure attacks. Such attacks have recently received major publicity as, in some cases, they even exposed citizens of oppressive regimes to life-threatening risks. In this article, we systematically assess the defenses that popular location-based services and mobile applications deploy to guard against adversaries seeking to identify a user's location. We provide the theoretical foundations for formalizing the privacy guarantees of currently adopted proximity models, design practical attacks for each case, and prove tight bounds on the number of queries required for carrying out successful attacks in practice. To evaluate the completeness of our approach, we conduct extensive experiments against popular services including Facebook, Foursquare, and Grindr. Our results demonstrate that, even though the aforementioned services implement various privacy-preserving techniques to protect their users, they are still vulnerable to attacks. In particular, we are able to pinpoint Facebook users within 5m of their exact location. For Foursquare and Grindr, users are pinpointed within 15m of their location in 90\% of the cases, even with the strictest privacy settings enabled. Our attacks are highly efficient and complete within a few seconds. The severity of our findings was acknowledged by Facebook and Foursquare, both of which have followed our recommendations and adopted our design of a safe proximity scheme in their production systems. As the number of mobile applications offering location functionality will continue to increase, service providers and software developers must be able to assess the privacy guarantees that their services offer. To that end, we discuss viable defenses that can be currently adopted by all major services, and provide an open-source testing framework to be used by researchers and service providers who wish to evaluate the privacy-preserving properties of applications offering proximity functionality.}, journal = {ACM Transactions on Privacy and Security (TOPS)}, number = {4} } @misc{ashkansoltaniFlashCookiesPrivacy2011, title = {Flash {{Cookies}} and {{Privacy II}}}, author = {{ashkansoltani}}, year = {2011}, month = aug, url = {https://ashkansoltani.org/2011/08/11/respawn-redux-flash-cookies/}, urldate = {2020-08-10}, abstract = {A detailed technical followup to Flash Cookies and Privacy II, describing the mechanisms behind Hulu/KISSmetrics' respawning practices I thought I'd take the time to elaborate a bit fur\ldots}, journal = {Ashkan Soltani}, note = {Accessed 2020-08-10} } @techreport{ayensonFlashCookiesPrivacy2011, title = {Flash {{Cookies}} and {{Privacy II}}: {{Now}} with {{HTML5}} and {{ETag Respawning}}}, shorttitle = {Flash {{Cookies}} and {{Privacy II}}}, author = {Ayenson, Mika D. and Wambach, Dietrich James and Soltani, Ashkan and Good, Nathan and Hoofnagle, Chris Jay}, year = {2011}, month = jul, institution = {{Social Science Research Network}}, url = {https://papers.ssrn.com/abstract=1898390}, urldate = {2020-02-13}, abstract = {In August 2009, we demonstrated that popular websites were using ``Flash cookies'' to track users. Some advertisers had adopted this technology because it allowed persistent tracking even where users had taken steps to avoid web profiling. We also demonstrated ``respawning'' on top sites with Flash technology. This allowed sites to reinstantiate HTTP cookies deleted by a user, making tracking more resistant to users' privacy-seeking behaviors.}, note = {Accessed 2020-02-13}, number = {ID 1898390}, type = {{{SSRN Scholarly Paper}}} } @inproceedings{bannihattikumarFindingChoiceHaystack2020, title = {Finding a {{Choice}} in a {{Haystack}}: {{Automatic Extraction}} of {{Opt}}-{{Out Statements}} from {{Privacy Policy Text}}}, shorttitle = {Finding a {{Choice}} in a {{Haystack}}}, booktitle = {Proceedings of {{The Web Conference}} 2020}, author = {Bannihatti Kumar, Vinayshekhar and Iyengar, Roger and Nisal, Namita and Feng, Yuanyuan and Habib, Hana and Story, Peter and Cherivirala, Sushain and Hagan, Margaret and Cranor, Lorrie and Wilson, Shomir and Schaub, Florian and Sadeh, Norman}, year = {2020}, month = apr, pages = {1943--1954}, abstract = {Website privacy policies sometimes provide users the option to opt-out of certain collections and uses of their personal data. Unfortunately, many privacy policies bury these instructions deep in their text, and few web users have the time or skill necessary to discover them. We describe a method for the automated detection of opt-out choices in privacy policy text and their presentation to users through a web browser extension. We describe the creation of two corpora of opt-out choices, which enable the training of classifiers to identify opt-outs in privacy policies. Our overall approach for extracting and classifying opt-out choices combines heuristics to identify commonly found opt-out hyperlinks with supervised machine learning to automatically identify less conspicuous instances. Our approach achieves a precision of 0.93 and a recall of 0.9. We introduce Opt-Out Easy, a web browser extension designed to present available opt-out choices to users as they browse the web. We evaluate the usability of our browser extension with a user study. We also present results of a large-scale analysis of opt-outs found in the text of thousands of the most popular websites.}, series = {{{WWW}} '20} } @misc{baronPreventingAttacksUser2010, title = {Preventing Attacks on a User's History through {{CSS}} :Visited Selectors}, author = {Baron, David}, year = {2010}, month = mar, url = {https://dbaron.org/mozilla/visited-privacy}, urldate = {2020-03-25}, journal = {dbaron.org}, note = {Accessed 2020-03-25} } @techreport{barthHTTPStateManagement2011, title = {{{HTTP State Management Mechanism}}}, author = {Barth, A.}, year = {2011}, month = apr, pages = {1--37}, institution = {{RFC Editor}}, url = {https://www.rfc-editor.org/info/rfc6265}, urldate = {2020-02-11}, abstract = {This document defines the HTTP Cookie and Set-Cookie header fields. These header fields can be used by HTTP servers to store state (called cookies) at HTTP user agents, letting the servers maintain a stateful session over the mostly stateless HTTP protocol. Although cookies have many historical infelicities that degrade their security and privacy, the Cookie and Set-Cookie header fields are widely used on the Internet. This document obsoletes RFC 2965.}, note = {Accessed 2020-02-11}, number = {6265}, type = {{{RFC}}} } @misc{barthThirdPartyCookies2011, title = {Third-{{Party Cookies}}}, author = {Barth {$<$}abarth@eecs.berkeley.edu{$>$}, Adam}, year = {2011}, month = apr, url = {https://tools.ietf.org/html/rfc6265\#section-7.1}, urldate = {2020-08-10}, note = {Accessed 2020-08-10} } @article{bashirDiffusionUserTracking2018, title = {Diffusion of {{User Tracking Data}} in the {{Online Advertising Ecosystem}}}, author = {Bashir, Muhammad Ahmad and Wilson, Christo}, year = {2018}, month = oct, volume = {2018}, pages = {85--103}, journal = {PoPETs}, number = {4} } @article{bashirTracingInformationFlows2018, title = {Tracing {{Information Flows Between Ad Exchanges Using Retargeted Ads}}}, author = {Bashir, Muhammad Ahmad and Arshad, Sajjad and Robertson, William and Wilson, Christo}, year = {2018}, month = nov, url = {http://arxiv.org/abs/1811.00920}, urldate = {2019-08-14}, abstract = {Numerous surveys have shown that Web users are concerned about the loss of privacy associated with online tracking. Alarmingly, these surveys also reveal that people are also unaware of the amount of data sharing that occurs between ad exchanges, and thus underestimate the privacy risks associated with online tracking.} } @misc{baumanEvercookieApplet2013, title = {Evercookie Applet}, author = {Bauman, Gabriel}, year = {2013}, month = apr, url = {https://github.com/gabrielbauman/evercookie-applet}, urldate = {2020-02-20}, copyright = {BSD-2-Clause}, note = {Accessed 2020-02-20} } @article{beckVisualAnalysisDissemination2016, title = {Visual {{Analysis}} and {{Dissemination}} of {{Scientific Literature Collections}} with {{SurVis}}}, author = {Beck, Fabian and Koch, Sebastian and Weiskopf, Daniel}, year = {2016}, month = jan, volume = {22}, pages = {180--189}, abstract = {Bibliographic data such as collections of scientific articles and citation networks have been studied extensively in information visualization and visual analytics research. Powerful systems have been built to support various types of bibliographic analysis, but they require some training and cannot be used to disseminate the insights gained. In contrast, we focused on developing a more accessible visual analytics system, called SurVis, that is ready to disseminate a carefully surveyed literature collection. The authors of a survey may use our Web-based system to structure and analyze their literature database. Later, readers of the survey can obtain an overview, quickly retrieve specific publications, and reproduce or extend the original bibliographic analysis. Our system employs a set of selectors that enable users to filter and browse the literature collection as well as to control interactive visualizations. The versatile selector concept includes selectors for textual search, filtering by keywords and meta-information, selection and clustering of similar publications, and following citation links. Agreement to the selector is represented by word-sized sparkline visualizations seamlessly integrated into the user interface. Based on an analysis of the analytical reasoning process, we derived requirements for the system. We developed the system in a formative way involving other researchers writing literature surveys. A questionnaire study with 14 visual analytics experts confirms that SurVis meets the initially formulated requirements.}, journal = {IEEE Transactions on Visualization and Computer Graphics}, number = {1} } @inproceedings{beigiProtectingUserPrivacy2019, title = {Protecting {{User Privacy}}: {{An Approach}} for {{Untraceable Web Browsing History}} and {{Unambiguous User Profiles}}}, shorttitle = {Protecting {{User Privacy}}}, booktitle = {Proceedings of the {{Twelfth ACM International Conference}} on {{Web Search}} and {{Data Mining}}}, author = {Beigi, Ghazaleh and Guo, Ruocheng and Nou, Alexander and Zhang, Yanchao and Liu, Huan}, year = {2019}, month = jan, pages = {213--221}, abstract = {The overturning of the Internet Privacy Rules by the Federal Communications Commissions (FCC) in late March 2017 allows Internet Service Providers (ISPs) to collect, share and sell their customers' Web browsing data without their consent. With third-party trackers embedded on Web pages, this new rule has put user privacy under more risk. The need arises for users on their own to protect their Web browsing history from any potential adversaries. Although some available solutions such as Tor, VPN, and HTTPS can help users conceal their online activities, their use can also significantly hamper personalized online services, i.e., degraded utility. In this paper, we design an effective Web browsing history anonymization scheme, PBooster, aiming to protect users' privacy while retaining the utility of their Web browsing history. The proposed model pollutes users' Web browsing history by automatically inferring how many and what links should be added to the history while addressing the utility-privacy trade-off challenge. We conduct experiments to validate the quality of the manipulated Web browsing history and examine the robustness of the proposed approach for user privacy protection.}, series = {{{WSDM}} '19} } @article{bellmanSiteOptinOptout2001, title = {On Site: To Opt-in or Opt-out? It Depends on the Question}, shorttitle = {On Site}, author = {Bellman, Steven and Johnson, Eric J. and Lohse, Gerald L.}, year = {2001}, month = feb, volume = {44}, pages = {25--27}, journal = {Communications of the ACM}, number = {2} } @article{belloroKnowWhatYou2018, title = {I {{Know What You Did Last Summer}}: {{New Persistent Tracking Mechanisms}} in the {{Wild}}}, shorttitle = {I {{Know What You Did Last Summer}}}, author = {Belloro, Stefano and Mylonas, Alexios}, year = {2018}, volume = {6}, pages = {52779--52792}, abstract = {As the usage of the Web increases, so do the threats an everyday user faces. One of the most pervasive threats a Web user faces is tracking, which enables an entity to gain unauthorized access to the user's personal data. Through the years, many client storage technologies, such as cookies, have been used for this purpose and have been extensively studied in the literature. The focus of this paper is on three newer client storage mechanisms, namely, Web Storage, Web SQL Database, and Indexed Database API. Initially, a large-scale analysis of their usage on the Web is conducted to appraise their usage in the wild. Then, this paper examines the extent that they are used for tracking purposes. The results suggest that Web Storage is the most used among the three technologies. More importantly, to the best of our knowledge, this paper is the first to suggest Web tracking as the main use case of these technologies. Motivated by these results, this paper examines whether popular desktop and mobile browsers protect their users from tracking mechanisms that use Web Storage, Web SQL Database, and Indexed Database. Our results uncover many cases where the relevant security controls are ineffective, thus making it virtually impossible for certain users to avoid tracking.}, annote = {Survey von Web Storage, Web SQL Database und IndexedDB}, journal = {IEEE Access} } @techreport{berners-leeUniformResourceLocators1994, title = {Uniform {{Resource Locators}} ({{URL}})}, author = {{Berners-Lee}, Timothy and Masinter, Larry and McCahill, Mark}, year = {1994}, month = dec, pages = {1--25}, institution = {{RFC Editor}}, url = {https://www.rfc-editor.org/info/rfc1738}, urldate = {2020-02-06}, abstract = {This document specifies a Uniform Resource Locator (URL), the syntax and semantics of formalized information for location and access of resources via the Internet.}, note = {Accessed 2020-02-06}, number = {1738}, type = {{{RFC}}} } @techreport{berners-leeUniversalResourceIdentifiers1994, title = {Universal {{Resource Identifiers}} in {{WWW}}: {{A Unifying Syntax}} for the {{Expression}} of {{Names}} and {{Addresses}} of {{Objects}} on the {{Network}} as Used in the {{World}}-{{Wide Web}}}, author = {{Berners-Lee}, Timothy}, year = {1994}, month = jun, pages = {1--28}, institution = {{RFC Editor}}, url = {https://www.rfc-editor.org/info/rfc1630}, urldate = {2020-02-06}, abstract = {This document defines the syntax used by the World-Wide Web initiative to encode the names and addresses of objects on the Internet. This memo provides information for the Internet community. This memo does not specify an Internet standard of any kind.}, note = {Accessed 2020-02-06}, number = {1630}, type = {{{RFC}}} } @inproceedings{bielovaWebTrackingTechnologies2017, title = {Web {{Tracking Technologies}} and {{Protection Mechanisms}}}, booktitle = {Proceedings of the 2017 {{ACM SIGSAC Conference}} on {{Computer}} and {{Communications Security}}}, author = {Bielova, Nataliia}, year = {2017}, month = oct, pages = {2607--2609}, abstract = {Billions of users browse the Web on a daily basis, leaving their digital traces on millions of websites. Every such visit, every mouse move or button click may trigger a wide variety of hidden data exchanges across multiple tracking companies. As a result, these companies collect a vast amount of user's data, preferences and habits, that are extremely useful for online advertisers and profitable for data brokers, however very worrisome for the privacy of the users. In this \textbackslash emph\{3-hours tutorial\} we will cover the vide variety of Web tracking technologies, ranging from simple cookies to advanced cross-device fingerprinting. We will describe the main mechanisms behind web tracking and what users can do to protect themselves. Moreover, we will discuss solutions Web developers can use to automatically eliminate tracking from the third-party content they include in their applications. This tutorial will be of interest to a \textbackslash emph\{general audience\} of computer scientists, and \textbackslash emph\{we do not require any specific prerequisite knowledge\} for attendees. We will cover the following tracking mechanisms: \textbackslash begin\{itemize\} \textbackslash item third-party cookie tracking, and other stateful tracking techniques that enables tracking across multiple websites, \textbackslash item cookie respawning that is used to re-create deleted user cookies, \textbackslash item cookie synching that allows trackers and ad agencies to synchronise user IDs across different companies, \textbackslash item browser fingerprinting, including Canvas, WebRTC and AudioContext fingerprinting \textbackslash item cross-browser device fingerprinting, allowing trackers to recognise users across several devices. \textbackslash end\{itemize\} We will then demonstrate prevalence of such techniques on the Web, based on previous research. We will present the advertisement ecosystem and explain how Web technologies are used in advertisement, in particular in Real-Time-Bidding (RTB). We will explain how cookie synching is used in RTB and present recent analysis on how much a user's tracking data is worth. We will discuss the mechanisms the website owners use to automatically interact with the ad agencies, and explain its consequences on user's security and privacy. To help users protect themselves from Web tracking, we will give an overview of existing solutions. We'll start with the browser settings, and show that basic third-party cookie tracking is still possible even in the private browser mode of most common Web browsers. We then present privacy-protecting browser extensions and compare how efficient they are in protection from Web tracking. Then, we'll present possible protection mechanisms based on browser randomisation to protect from advanced fingerprinting techniques. Finally, we will present solutions for Web developers, who want to include third-party content in their websites, but would like to automatically remove any tracking of their users. In particular, we will discuss simple solutions that exist today for social plugins integration, and propose more advanced server-side based solutions that are a result of our own research.}, series = {{{CCS}} '17} } @article{brookmanCrossDeviceTrackingMeasurement2017, title = {Cross-{{Device Tracking}}: {{Measurement}} and {{Disclosures}}}, shorttitle = {Cross-{{Device Tracking}}}, author = {Brookman, Justin and Rouge, Phoebe and Alva, Aaron and Yeung, Christina}, year = {2017}, month = apr, volume = {2017}, pages = {133--148}, journal = {PoPETs}, number = {2} } @article{buhovFLASH20thCentury2018, title = {{{FLASH}}: {{Is}} the 20th {{Century Hero Really Gone}}? {{Large}}-{{Scale Evaluation}} on {{Flash Usage}} \& {{Its Security}} and {{Privacy Implications}}}, author = {Buhov, Damjan and Rauchberger, Julian and Schrittwieser, Sebastian}, year = {2018}, month = dec, volume = {9}, pages = {15}, abstract = {Although the Adobe Flash browser plugin steadily lost popularity throughout the last few years, Flash content still regularly appears when browsing the web. Known for its infamous security track record, Flash remains a challenge in making web browsing more secure. In this paper, we present a largescale measurement of the current uses of Flash, based on a crawl of the top 1 million websites. The different types of measurements result in most detailed classification of Flash uses to date. In particular, special attention is payed to Flash usage related to user tracking, as well as to malicious Flash files used by malvertising or exploit kits. We present Garrick, a novel crawling framework, which is based on a full-fledged Mozilla Firefox browser. Garrick is able to mimic any browser, plugin and operating system configuration so that fingerprinting scripts can be tricked to deliver malicious Flash files. Our measurements show that Flash is still used by approximately 7.5\% of the top 1 million websites, with 62\% of the Flash content coming from third-parties such as ad networks. In general, on popular websites Flash usage is higher compared to less prominent websites and a bigger share of Flash content on these sites comes from third-parties. From a security perspective, malicious Flash files served by highly targeted malvertising campaigns are an ongoing challenge.}, journal = {Journal of Wireless Mobile Networks, Ubiquitous Computing, and Dependable Applications}, number = {4} } @article{bujlowSurveyWebTracking2017, title = {A {{Survey}} on {{Web Tracking}}: {{Mechanisms}}, {{Implications}}, and {{Defenses}}}, shorttitle = {A {{Survey}} on {{Web Tracking}}}, author = {Bujlow, T. and {Carela-Espa{\~n}ol}, V. and {Sol{\'e}-Pareta}, J. and {Barlet-Ros}, P.}, year = {2017}, month = aug, volume = {105}, pages = {1476--1510}, abstract = {Privacy seems to be the Achilles' heel of today's web. Most web services make continuous efforts to track their users and to obtain as much personal information as they can from the things they search, the sites they visit, the people they contact, and the products they buy. This information is mostly used for commercial purposes, which go far beyond targeted advertising. Although many users are already aware of the privacy risks involved in the use of internet services, the particular methods and technologies used for tracking them are much less known. In this survey, we review the existing literature on the methods used by web services to track the users online as well as their purposes, implications, and possible user's defenses. We present five main groups of methods used for user tracking, which are based on sessions, client storage, client cache, fingerprinting, and other approaches. A special focus is placed on mechanisms that use web caches, operational caches, and fingerprinting, as they are usually very rich in terms of using various creative methodologies. We also show how the users can be identified on the web and associated with their real names, e-mail addresses, phone numbers, or even street addresses. We show why tracking is being used and its possible implications for the users. For each of the tracking methods, we present possible defenses. Some of them are specific to a particular tracking approach, while others are more universal (block more than one threat). Finally, we present the future trends in user tracking and show that they can potentially pose significant threats to the users' privacy.}, journal = {Proc. IEEE}, number = {8} } @inproceedings{cahnEmpiricalStudyWeb2016, title = {An {{Empirical Study}} of {{Web Cookies}}}, booktitle = {Proceedings of the 25th {{International Conference}} on {{World Wide Web}}}, author = {Cahn, Aaron and Alfeld, Scott and Barford, Paul and Muthukrishnan, S.}, year = {2016}, pages = {891--901}, abstract = {Web cookies are used widely by publishers and 3rd parties to track users and their behaviors. Despite the ubiquitous use of cookies, there is little prior work on their characteristics such as standard attributes, placement policies, and the knowledge that can be amassed via 3rd party cookies. In this paper, we present an empirical study of web cookie characteristics, placement practices and information transmission. To conduct this study, we implemented a lightweight web crawler that tracks and stores the cookies as it navigates to websites. We use this crawler to collect over 3.2M cookies from the two crawls, separated by 18 months, of the top 100K Alexa web sites. We report on the general cookie characteristics and add context via a cookie category index and website genre labels. We consider privacy implications by examining specific cookie attributes and placement behavior of 3rd party cookies. We find that 3rd party cookies outnumber 1st party cookies by a factor of two, and we illuminate the connection between domain genres and cookie attributes. We find that less than 1\% of the entities that place cookies can aggregate information across 75\% of web sites. Finally, we consider the issue of information transmission and aggregation by domains via 3rd party cookies. We develop a mathematical framework to quantify user information leakage for a broad class of users, and present findings using real world domains. In particular, we demonstrate the interplay between a domain's footprint across the Internet and the browsing behavior of users, which has significant impact on information transmission.}, series = {{{WWW}} '16} } @inproceedings{cahnWhatCommunityCookie2016, title = {What's in the {{Community Cookie Jar}}?}, booktitle = {Proceedings of the 2016 {{IEEE}}/{{ACM International Conference}} on {{Advances}} in {{Social Networks Analysis}} and {{Mining}}}, author = {Cahn, Aaron and Alfeld, Scott and Barford, Paul and Muthukrishnan, S.}, year = {2016}, month = aug, pages = {567--570}, abstract = {Third party tracking of user behavior via web cookies represents a privacy threat. In this paper we assess this threat through an analysis of anonymized, crowd-sourced cookie data provided by Cookiepedia.co.uk. We find that nearly 45\% of the cookies in the corpus are from Facebook and of the remaining cookies 25\% come from 10 distinct domains. Over 65\% are Maximal Permission cookies (i.e., 3rd party, non-secure, persistent, root-level). Cookiepedia's anonymization of user data presents challenges with respect to modeling site traffic. We further elucidate the privacy issue by conducting targeted crawling campaigns to supplement the Cookiepedia data. We find that the amount of traffic obscured by Cookiepedia's anonymizing procedure varies dramatically from site to site - sometimes obscuring as much as 80\% of traffic. We use the crawls to infer the inverse function of the anonymizing procedure, allowing us to enhance the crowd-sourced dataset while maintaining user anonymity.}, series = {{{ASONAM}} '16} } @misc{centerfordemocracytechnologyComplaintRequestInvestigation2017, title = {Complaint, {{Request}} for {{Investigation}}, {{Injunction}}, and {{Other Relief}}: {{AnchorFree}}, {{Inc}}. {{Hotspot Shield VPN}}}, author = {{Center for Democracy \& Technology}}, year = {2017}, month = aug, url = {https://cdt.org/wp-content/uploads/2017/08/FTC-CDT-VPN-complaint-8-7-17.pdf}, note = {Accessed 2020-08-10} } @inproceedings{chaabaneBigFriendWatching2012, title = {Big {{Friend}} Is {{Watching You}}: {{Analyzing Online Social Networks Tracking Capabilities}}}, shorttitle = {Big {{Friend}} Is {{Watching You}}}, booktitle = {Proceedings of the 2012 {{ACM Workshop}} on {{Workshop}} on {{Online Social Networks}}}, author = {Chaabane, Abdelberi and Kaafar, Mohamed Ali and Boreli, Roksana}, year = {2012}, pages = {7--12}, abstract = {In this paper, we examine web user tracking capabilities of the three major global Online Social Networks (OSNs). We study the mechanisms which enable these services to persistently and accurately follow users web activity, and evaluate to which extent this phenomena is spread across the web. Through a study of the top 10K websites, our findings indicate that OSN tracking is diffused among almost all website categories, independently from the content and from the audience. We also evaluate the tracking capabilities in practice and demonstrate by analyzing a real traffic traces that OSNs can reconstruct a significant portion of users web profile and browsing history. We finally provide insights into the relation between the browsing history characteristics and the OSN tracking potential, highlighting the high risk properties.}, series = {{{WOSN}} '12} } @article{davidsonPrivacyPassBypassing2018, title = {Privacy {{Pass}}: {{Bypassing Internet Challenges Anonymously}}}, shorttitle = {Privacy {{Pass}}}, author = {Davidson, Alex and Goldberg, Ian and Sullivan, Nick and Tankersley, George and Valsorda, Filippo}, year = {2018}, month = jun, volume = {2018}, pages = {164--180}, abstract = {The growth of content delivery networks (CDNs) has engendered centralized control over the serving of internet content. An unwanted by-product of this growth is that CDNs are fast becoming global arbiters for which content requests are allowed and which are blocked in an attempt to stanch malicious traffic. In particular, in some cases honest users \textemdash{} especially those behind shared IP addresses, including users of privacy tools such as Tor, VPNs, and I2P \textemdash{} can be unfairly targeted by attempted `catch-all solutions' that assume these users are acting maliciously. In this work, we provide a solution to prevent users from being exposed to a disproportionate amount of internet challenges such as CAPTCHAs. These challenges are at the very least annoying and at their worst \textemdash{} when coupled with bad implementations \textemdash{} can completely block access from web resources. We detail a 1-RTT cryptographic protocol (based on an implementation of an oblivious pseudorandom function) that allows users to receive a significant amount of anonymous tokens for each challenge solution that they provide. These tokens can be exchanged in the future for access without having to interact with a challenge. We have implemented our initial solution in a browser extension named ``Privacy Pass'', and have worked with the Cloudflare CDN to deploy compatible server-side components in their infrastructure. However, we envisage that our solution could be used more generally for many applications where anonymous and honest access can be granted (e.g., anonymous wiki editing). The anonymity guarantee of our solution makes it immediately appropriate for use by users of Tor/VPNs/I2P. We also publish figures from Cloudflare indicating the potential impact from the global release of Privacy Pass.}, journal = {Proc. Priv. Enhancing Technol.}, language = {English}, number = {3} } @article{degelingWeValueYour2019, title = {We {{Value Your Privacy}} ... {{Now Take Some Cookies}}: {{Measuring}} the {{GDPR}}'s {{Impact}} on {{Web Privacy}}}, shorttitle = {We {{Value Your Privacy}} ... {{Now Take Some Cookies}}}, author = {Degeling, Martin and Utz, Christine and Lentzsch, Christopher and Hosseini, Henry and Schaub, Florian and Holz, Thorsten}, year = {2019}, abstract = {The European Union's General Data Protection Regulation (GDPR) went into effect on May 25, 2018. Its privacy regulations apply to any service and company collecting or processing personal data in Europe. Many companies had to adjust their data handling processes, consent forms, and privacy policies to comply with the GDPR's transparency requirements. We monitored this rare event by analyzing changes on popular websites in all 28 member states of the European Union. For each country, we periodically examined its 500 most popular websites \textendash{} 6,579 in total \textendash{} for the presence of and updates to their privacy policy between December 2017 and October 2018. While many websites already had privacy policies, we find that in some countries up to 15.7 \% of websites added new privacy policies by May 25, 2018, resulting in 84.5 \% of websites having privacy policies. 72.6 \% of websites with existing privacy policies updated them close to the date. After May this positive development slowed down noticeably. Most visibly, 62.1 \% of websites in Europe now display cookie consent notices, 16 \% more than in January 2018. These notices inform users about a site's cookie use and user tracking practices. We categorized all observed cookie consent notices and evaluated 28 common implementations with respect to their technical realization of cookie consent. Our analysis shows that core web security mechanisms such as the same-origin policy pose problems for the implementation of consent according to GDPR rules, and opting out of third-party cookies requires the third party to cooperate. Overall, we conclude that the web became more transparent at the time GDPR came into force, but there is still a lack of both functional and usable mechanisms for users to consent to or deny processing of their personal data on the Internet.}, journal = {Proc. 2019 Netw. Distrib. Syst. Secur. Symp.} } @misc{dingledineTorProtocolSpecifications, title = {Tor's Protocol Specifications - {{Path Specification}}}, author = {Dingledine, Roger and Mathewson, Nick}, url = {https://gitweb.torproject.org/torspec.git/tree/path-spec.txt}, urldate = {2020-07-14}, note = {Accessed 2020-07-14} } @misc{DuckDuckGoa, title = {{{DuckDuckGo}}}, url = {https://duckduckgo.com/}, urldate = {2020-07-10}, abstract = {The Internet privacy company that empowers you to seamlessly take control of your personal information online, without any tradeoffs.}, journal = {DuckDuckGo}, note = {Accessed 2020-07-10} } @misc{EasyList, title = {{{EasyList}}}, url = {https://easylist.to/}, urldate = {2020-07-12}, note = {Accessed 2020-07-12} } @article{enckTaintDroidInformationFlowTracking2014, title = {{{TaintDroid}}: {{An Information}}-{{Flow Tracking System}} for {{Realtime Privacy Monitoring}} on {{Smartphones}}}, shorttitle = {{{TaintDroid}}}, author = {Enck, William and Gilbert, Peter and Han, Seungyeop and Tendulkar, Vasant and Chun, Byung-Gon and Cox, Landon P. and Jung, Jaeyeon and McDaniel, Patrick and Sheth, Anmol N.}, year = {2014}, month = jun, volume = {32}, pages = {5:1--5:29}, abstract = {Today's smartphone operating systems frequently fail to provide users with visibility into how third-party applications collect and share their private data. We address these shortcomings with TaintDroid, an efficient, system-wide dynamic taint tracking and analysis system capable of simultaneously tracking multiple sources of sensitive data. TaintDroid enables realtime analysis by leveraging Android's virtualized execution environment. TaintDroid incurs only 32\% performance overhead on a CPU-bound microbenchmark and imposes negligible overhead on interactive third-party applications. Using TaintDroid to monitor the behavior of 30 popular third-party Android applications, in our 2010 study we found 20 applications potentially misused users' private information; so did a similar fraction of the tested applications in our 2012 study. Monitoring the flow of privacy-sensitive data with TaintDroid provides valuable input for smartphone users and security service firms seeking to identify misbehaving applications.}, journal = {ACM Transactions on Computer Systems (TOCS)}, number = {2} } @inproceedings{englehardtCookiesThatGive2015, title = {Cookies {{That Give You Away}}: {{The Surveillance Implications}} of {{Web Tracking}}}, shorttitle = {Cookies {{That Give You Away}}}, booktitle = {Proceedings of the 24th {{International Conference}} on {{World Wide Web}}}, author = {Englehardt, Steven and Reisman, Dillon and Eubank, Christian and Zimmerman, Peter and Mayer, Jonathan and Narayanan, Arvind and Felten, Edward W.}, year = {2015}, month = may, pages = {289--299}, abstract = {We study the ability of a passive eavesdropper to leverage "third-party" HTTP tracking cookies for mass surveillance. If two web pages embed the same tracker which tags the browser with a unique cookie, then the adversary can link visits to those pages from the same user (i.e., browser instance) even if the user's IP address varies. Further, many popular websites leak a logged-in user's identity to an eavesdropper in unencrypted traffic. To evaluate the effectiveness of our attack, we introduce a methodology that combines web measurement and network measurement. Using OpenWPM, our web privacy measurement platform, we simulate users browsing the web and find that the adversary can reconstruct 62-73\% of a typical user's browsing history. We then analyze the effect of the physical location of the wiretap as well as legal restrictions such as the NSA's "one-end foreign" rule. Using measurement units in various locations - Asia, Europe, and the United States - we show that foreign users are highly vulnerable to the NSA's dragnet surveillance due to the concentration of third-party trackers in the U.S. Finally, we find that some browser-based privacy tools mitigate the attack while others are largely ineffective.}, series = {{{WWW}} '15} } @inproceedings{englehardtOnlineTracking1MillionSite2016, title = {Online {{Tracking}}: {{A}} 1-{{Million}}-{{Site Measurement}} and {{Analysis}}}, shorttitle = {Online {{Tracking}}}, booktitle = {Proceedings of the 2016 {{ACM SIGSAC Conference}} on {{Computer}} and {{Communications Security}}}, author = {Englehardt, Steven and Narayanan, Arvind}, year = {2016}, month = oct, pages = {1388--1401}, abstract = {We present the largest and most detailed measurement of online tracking conducted to date, based on a crawl of the top 1 million websites. We make 15 types of measurements on each site, including stateful (cookie-based) and stateless (fingerprinting-based) tracking, the effect of browser privacy tools, and the exchange of tracking data between different sites ("cookie syncing"). Our findings include multiple sophisticated fingerprinting techniques never before measured in the wild. This measurement is made possible by our open-source web privacy measurement tool, OpenWPM, which uses an automated version of a full-fledged consumer browser. It supports parallelism for speed and scale, automatic recovery from failures of the underlying browser, and comprehensive browser instrumentation. We demonstrate our platform's strength in enabling researchers to rapidly detect, quantify, and characterize emerging online tracking behaviors.}, series = {{{CCS}} '16} } @inproceedings{falahrastegarTrackingPersonalIdentifiers2016, title = {Tracking {{Personal Identifiers Across}} the {{Web}}}, booktitle = {Passive and {{Active Measurement}}}, author = {Falahrastegar, Marjan and Haddadi, Hamed and Uhlig, Steve and Mortier, Richard}, editor = {Karagiannis, Thomas and Dimitropoulos, Xenofontas}, year = {2016}, pages = {30--41}, abstract = {User tracking has become de facto practice of the Web, however, our understanding of the scale and nature of this practice remains rudimentary. In this paper, we explore the connections amongst all parties of the Web, especially focusing on how trackers share user IDs. Using data collected from both browsing histories of 129 users and active experiments, we identify user-specific IDs that we suspect are used to track users. We find a significant amount of ID-sharing practices across different organisations providing various service categories. Our observations reveal that ID-sharing happens in a large scale regardless of the user profile size and profile condition such as logged-in and logged-out. We unexpectedly observe a higher number of ID-sharing domains when user is logged-out. We believe that our work reveals the huge gap between what is known about user tracking and what is done by this complex and important ecosystem.}, series = {Lecture {{Notes}} in {{Computer Science}}} } @inproceedings{feltenTimingAttacksWeb2000, title = {Timing Attacks on {{Web}} Privacy}, booktitle = {Proceedings of the 7th {{ACM}} Conference on {{Computer}} and {{Communications Security}}}, author = {Felten, Edward W. and Schneider, Michael A.}, year = {2000}, month = nov, pages = {25--32}, annote = {DNS cache timing attacks \par Web cache timing attacks}, series = {{{CCS}} '00} } @misc{fieldingHTTPETag, title = {{{HTTP}}/1.1: {{ETag}}}, shorttitle = {Hypertext {{Transfer Protocol}} ({{HTTP}}/1.1)}, author = {Fielding, Roy and Reschke, Julian}, url = {https://tools.ietf.org/html/rfc7232\#section-2.3}, urldate = {2020-08-25} } @misc{fieldingHTTPLastModified, title = {{{HTTP}}/1.1: {{Last}}-{{Modified}}}, shorttitle = {Hypertext {{Transfer Protocol}} ({{HTTP}}/1.1)}, author = {Fielding, Roy and Reschke, Julian}, url = {https://tools.ietf.org/html/rfc7232\#section-2.2}, urldate = {2020-08-25} } @misc{fieldingHTTPSemanticsContent2014, title = {{{HTTP}}: {{Semantics}} and {{Content}} - {{Referer}}}, shorttitle = {Hypertext {{Transfer Protocol}} ({{HTTP}}/1.1)}, author = {Fielding, Roy and Reschke, Julian}, year = {2014}, month = jun, url = {https://tools.ietf.org/html/rfc7231\#section-5.5.2}, urldate = {2020-08-10}, note = {Accessed 2020-08-10} } @article{frankenExposingCookiePolicy2019, title = {Exposing {{Cookie Policy Flaws Through}} an {{Extensive Evaluation}} of {{Browsers}} and {{Their Extensions}}}, author = {Franken, Gertjan and Van Goethem, Tom and Joosen, Wouter}, year = {2019}, month = jul, volume = {17}, pages = {25--34}, abstract = {Online abuses give browser users an incentive to employ third-party cookie policies. These policies, built directly into the browser or provided through extensions, are intended to enhance the user's security and privacy. Unfortunately, virtually every policy can be bypassed.}, journal = {IEEE Security Privacy}, number = {4} } @inproceedings{frankenWhoLeftOpen2018, title = {Who {{Left Open}} the {{Cookie Jar}}? {{A Comprehensive Evaluation}} of {{Third}}-{{Party Cookie Policies}}}, shorttitle = {Who {{Left Open}} the {{Cookie Jar}}?}, booktitle = {27th \{\vphantom\}{{USENIX}}\vphantom\{\} {{Security Symposium}} (\{\vphantom\}{{USENIX}}\vphantom\{\} {{Security}} 18)}, author = {Franken, Gertjan and Goethem, Tom Van and Joosen, Wouter}, year = {2018}, pages = {151--168}, url = {https://www.usenix.org/conference/usenixsecurity18/presentation/franken}, urldate = {2020-02-05} } @misc{frankSessionVariablesCookies2008, title = {Session Variables without Cookies}, author = {Frank, Thomas}, year = {2008}, month = jan, url = {https://www.thomasfrank.se/sessionvars.html}, urldate = {2020-02-10} } @article{gerberInvestigatingPeoplePrivacy2019, title = {Investigating {{People}}'s {{Privacy Risk Perception}}}, author = {Gerber, Nina and Reinheimer, Benjamin and Volkamer, Melanie}, year = {2019}, month = jul, volume = {2019}, pages = {267--288}, journal = {Proc. Priv. Enhancing Technol.}, number = {3} } @article{gongAttributeInferenceAttacks2018, title = {Attribute {{Inference Attacks}} in {{Online Social Networks}}}, author = {Gong, Neil Zhenqiang and Liu, Bin}, year = {2018}, month = jan, volume = {21}, pages = {3:1--3:30}, abstract = {We propose new privacy attacks to infer attributes (e.g., locations, occupations, and interests) of online social network users. Our attacks leverage seemingly innocent user information that is publicly available in online social networks to infer missing attributes of targeted users. Given the increasing availability of (seemingly innocent) user information online, our results have serious implications for Internet privacy\textemdash private attributes can be inferred from users' publicly available data unless we take steps to protect users from such inference attacks. To infer attributes of a targeted user, existing inference attacks leverage either the user's publicly available social friends or the user's behavioral records (e.g., the web pages that the user has liked on Facebook, the apps that the user has reviewed on Google Play), but not both. As we will show, such inference attacks achieve limited success rates. However, the problem becomes qualitatively different if we consider both social friends and behavioral records. To address this challenge, we develop a novel model to integrate social friends and behavioral records, and design new attacks based on our model. We theoretically and experimentally demonstrate the effectiveness of our attacks. For instance, we observe that, in a real-world large-scale dataset with 1.1 million users, our attack can correctly infer the cities a user lived in for 57\% of the users; via confidence estimation, we are able to increase the attack success rate to over 90\% if the attacker selectively attacks half of the users. Moreover, we show that our attack can correctly infer attributes for significantly more users than previous attacks.}, journal = {ACM Transactions on Privacy and Security (TOPS)}, number = {1} } @inproceedings{gonzalezCookieRecipeUntangling2017, title = {The Cookie Recipe: {{Untangling}} the Use of Cookies in the Wild}, shorttitle = {The Cookie Recipe}, booktitle = {2017 {{Network Traffic Measurement}} and {{Analysis Conference}} ({{TMA}})}, author = {Gonzalez, Roberto and Jiang, Lili and Ahmed, Mohamed and Marciel, Miriam and Cuevas, Ruben and Metwalley, Hassan and Niccolini, Saverio}, year = {2017}, month = jun, pages = {1--9}, abstract = {Users online are commonly tracked using HTTP cookies when browsing on the web. To protect their privacy, users tend to use simple tools to block the activity of HTTP cookies. However, the ``block all'' design of tools breaks critical web services or severely limits the online advertising ecosystem. Therefore, to ease this tension, a more nuanced strategy that discerns better the intended functionality of the HTTP cookies users encounter is required. We present the first large-scale study of the use of HTTP cookies in the wild using network traces containing more than 5.6 billion HTTP requests from real users for a period of two and a half months. We first present a statistical analysis of how cookies are used. We then analyze the structure of cookies and observe that; HTTP cookies are significantly more sophisticated than the name=value defined by the standard and assumed by researchers and developers. Based on our findings we present an algorithm that is able to extract the information included in 86\% of the cookies in our dataset with an accuracy of 91.7\%. Finally, we discuss the implications of our findings and provide solutions that can be used to improve the most promising privacy preserving tools.}, language = {en} } @misc{googleinc.CookieMatchingRealtime2020, title = {Cookie {{Matching}} | {{Real}}-Time {{Bidding}}}, author = {Google Inc.}, year = {2020}, month = jun, url = {https://developers.google.com/authorized-buyers/rtb/cookie-guide}, urldate = {2020-08-10}, journal = {Google Developers}, note = {Accessed 2020-08-10} } @article{gugelmannAutomatedApproachComplementing2015, title = {An {{Automated Approach}} for {{Complementing Ad Blockers}}' {{Blacklists}}}, author = {Gugelmann, David and Happe, Markus and Ager, Bernhard and Lenders, Vincent}, year = {2015}, month = jun, volume = {2015}, pages = {282--298}, journal = {PoPETs}, number = {2} } @inproceedings{habibItScavengerHunt2020, title = {"{{It}}'s a Scavenger Hunt": {{Usability}} of {{Websites}}' {{Opt}}-{{Out}} and {{Data Deletion Choices}}}, shorttitle = {"{{It}}'s a Scavenger Hunt"}, booktitle = {Proceedings of the 2020 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, author = {Habib, Hana and Pearman, Sarah and Wang, Jiamin and Zou, Yixin and Acquisti, Alessandro and Cranor, Lorrie Faith and Sadeh, Norman and Schaub, Florian}, year = {2020}, month = apr, pages = {1--12}, abstract = {We conducted an in-lab user study with 24 participants to explore the usefulness and usability of privacy choices offered by websites. Participants were asked to find and use choices related to email marketing, targeted advertising, or data deletion on a set of nine websites that differed in terms of where and how these choices were presented. They struggled with several aspects of the interaction, such as selecting the correct page from a site's navigation menu and understanding what information to include in written opt-out requests. Participants found mechanisms located in account settings pages easier to use than options contained in privacy policies, but many still consulted help pages or sent email to request assistance. Our findings indicate that, despite their prevalence, privacy choices like those examined in this study are difficult for consumers to exercise in practice. We provide design and policy recommendations for making these website opt-out and deletion choices more useful and usable for consumers.}, series = {{{CHI}} '20} } @misc{hicksonWebSQLDatabase2010, title = {Web {{SQL Database}}}, author = {Hickson, Ian and Google Inc.}, year = {2010}, month = nov, url = {https://www.w3.org/TR/webdatabase/}, urldate = {2020-03-20}, note = {Accessed 2020-03-20} } @misc{hillGorhillUBlock2020, title = {Gorhill/{{uBlock}}}, author = {Hill, Raymond}, year = {2020}, month = jul, url = {https://github.com/gorhill/uBlock}, urldate = {2020-07-12}, abstract = {uBlock Origin}, copyright = {GPL-3.0 License , GPL-3.0 License}, note = {Accessed 2020-07-12} } @article{huCharacterisingThirdParty2019, title = {Characterising {{Third Party Cookie Usage}} in the {{EU}} after {{GDPR}}}, author = {Hu, Xuehui and Sastry, Nishanth}, year = {2019}, pages = {137--141}, abstract = {The recently introduced General Data Protection Regulation (GDPR) requires that when obtaining information online that could be used to identify individuals, their consents must be obtained. Among other things, this affects many common forms of cookies, and users in the EU have been presented with notices asking their approvals for data collection. This paper examines the prevalence of third party cookies before and after GDPR by using two datasets: accesses to top 500 websites according to Alexa.com, and weekly data of cookies placed in users' browsers by websites accessed by 16 UK and China users across one year.}, journal = {Proc. 10th ACM Conf. Web Sci. - WebSci 19} } @article{ikramSeamlessTrackingFreeWeb2016, title = {Towards {{Seamless Tracking}}-{{Free Web}}: {{Improved Detection}} of {{Trackers}} via {{One}}-{{Class Learning}}}, shorttitle = {Towards {{Seamless Tracking}}-{{Free Web}}}, author = {Ikram, Muhammad and Asghar, Hassan Jameel and Kaafar, Mohamed Ali and Krishnamurthy, Balachander and Mahanti, Anirban}, year = {2016}, month = mar, url = {http://arxiv.org/abs/1603.06289}, urldate = {2019-08-14}, abstract = {Numerous tools have been developed to aggressively block the execution of popular JavaScript programs in Web browsers. Such blocking also affects functionality of webpages and impairs user experience. As a consequence, many privacy preserving tools that have been developed to limit online tracking, often executed via JavaScript programs, may suffer from poor performance and limited uptake. A mechanism that can isolate JavaScript programs necessary for proper functioning of the website from tracking JavaScript programs would thus be useful. Through the use of a manually labelled dataset composed of 2,612 JavaScript programs, we show how current privacy preserving tools are ineffective in finding the right balance between blocking tracking JavaScript programs and allowing functional JavaScript code. To the best of our knowledge, this is the first study to assess the performance of current web privacy preserving tools.}, language = {English} } @article{InputFormInput, title = {{$<$}input{$>$}: {{The Input}} ({{Form Input}}) {{Element}}: {$<$}input Type="hidden"{$>$}}, url = {https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input/hidden}, urldate = {2020-02-08}, abstract = {input elements of type "hidden" let web developers include data that cannot be seen or modified by users when a form is submitted. For example, the ID of the content that is currently being ordered or edited, or a unique security token. Hidden inputs are completely invisible in the rendered page, and there is no way to make it visible in the page's content.}, journal = {MDN Web Docs}, note = {Accessed 2020-02-08} } @inproceedings{iordanouTracingCrossBorder2018, title = {Tracing {{Cross Border Web Tracking}}}, booktitle = {Proceedings of the {{Internet Measurement Conference}} 2018}, author = {Iordanou, Costas and Smaragdakis, Georgios and Poese, Ingmar and Laoutaris, Nikolaos}, year = {2018}, month = oct, pages = {329--342}, abstract = {A tracking flow is a flow between an end user and a Web tracking service. We develop an extensive measurement methodology for quantifying at scale the amount of tracking flows that cross data protection borders, be it national or international, such as the EU28 border within which the General Data Protection Regulation (GDPR) applies. Our methodology uses a browser extension to fully render advertising and tracking code, various lists and heuristics to extract well known trackers, passive DNS replication to get all the IP ranges of trackers, and state-of-the art geolocation. We employ our methodology on a dataset from 350 real users of the browser extension over a period of more than four months, and then generalize our results by analyzing billions of web tracking flows from more than 60 million broadband and mobile users from 4 large European ISPs. We show that the majority of tracking flows cross national borders in Europe but, unlike popular belief, are pretty well confined within the larger GDPR jurisdiction. Simple DNS redirection and PoP mirroring can increase national confinement while sealing almost all tracking flows within Europe. Last, we show that cross boarder tracking is prevalent even in sensitive and hence protected data categories and groups including health, sexual orientation, minors, and others.}, series = {{{IMC}} '18} } @article{iordanouWhoTrackingSensitive2019, title = {Who's {{Tracking Sensitive Domains}}?}, author = {Iordanou, Costas and Smaragdakis, Georgios and Laoutaris, Nikolaos}, year = {2019}, month = aug, url = {http://arxiv.org/abs/1908.02261}, urldate = {2019-08-14}, abstract = {We turn our a ention to the elephant in the room of data protection, which is none other than the simple and obvious question: ``Who's tracking sensitive domains?''. Despite a fast-growing amount of work on more complex facets of the interplay between privacy and the business models of the Web, the obvious question of who collects data on domains where most people would prefer not be seen, has received rather limited a ention. First, we develop a methodology for automatically annotating websites that belong to a sensitive category, e.g., as de ned by the General Data Protection Regulation (GDPR). en, we extract the third party tracking services included directly, or via recursive inclusions, by the above mentioned sites. Having analyzed around 30k sensitive domains, we show that such domains are tracked, albeit less intensely than the mainstream ones. Looking in detail at the tracking services operating on them, we nd well known names, as well as some less known ones, including some specializing on speci c sensitive categories.} } @article{iqbalAdGraphGraphBasedApproach2018, title = {{{AdGraph}}: {{A Graph}}-{{Based Approach}} to {{Ad}} and {{Tracker Blocking}}}, shorttitle = {{{AdGraph}}}, author = {Iqbal, Umar and Snyder, Peter and Zhu, Shitong and Livshits, Benjamin and Qian, Zhiyun and Shafiq, Zubair}, year = {2018}, month = may, url = {http://arxiv.org/abs/1805.09155}, urldate = {2019-08-14}, abstract = {User demand for blocking advertising and tracking online is large and growing. Existing tools, both deployed and described in research, have proven useful, but lack either the completeness or robustness needed for a general solution. Existing detection approaches generally focus on only one aspect of advertising or tracking (e.g. URL patterns, code structure), making existing approaches susceptible to evasion.} } @inproceedings{iqbalAdWarsRetrospective2017, title = {The {{Ad Wars}}: {{Retrospective Measurement}} and {{Analysis}} of {{Anti}}-{{Adblock Filter Lists}}}, shorttitle = {The Ad Wars}, booktitle = {Proceedings of the 2017 {{Internet Measurement Conference}}}, author = {Iqbal, Umar and Shafiq, Zubair and Qian, Zhiyun}, year = {2017}, month = nov, pages = {171--183}, abstract = {The increasing popularity of adblockers has prompted online publishers to retaliate against adblock users by deploying anti-adblock scripts, which detect adblock users and bar them from accessing content unless they disable their adblocker. To circumvent anti-adblockers, adblockers rely on manually curated anti-adblock filter lists for removing anti-adblock scripts. Anti-adblock filter lists currently rely on informal crowdsourced feedback from users to add/remove filter list rules. In this paper, we present the first comprehensive study of anti-adblock filter lists to analyze their effectiveness against anti-adblockers. Specifically, we compare and contrast the evolution of two popular anti-adblock filter lists. We show that these filter lists are implemented very differently even though they currently have a comparable number of filter list rules. We then use the Internet Archive's Wayback Machine to conduct a retrospective coverage analysis of these filter lists on Alexa top-5K websites over the span of last five years. We find that the coverage of these filter lists has considerably improved since 2014 and they detect anti-adblockers on about 9\% of Alexa top-5K websites. To improve filter list coverage and speedup addition of new filter rules, we also design and implement a machine learning based method to automatically detect anti-adblock scripts using static JavaScript code analysis.}, series = {{{IMC}} '17} } @inproceedings{jacksonProtectingBrowserState2006, title = {Protecting Browser State from Web Privacy Attacks}, booktitle = {Proceedings of the 15th International Conference on {{World Wide Web}}}, author = {Jackson, Collin and Bortz, Andrew and Boneh, Dan and Mitchell, John C.}, year = {2006}, month = may, pages = {737--744}, abstract = {Through a variety of means, including a range of browser cache methods and inspecting the color of a visited hyperlink, client-side browser state can be exploited to track users against their wishes. This tracking is possible because persistent, client-side browser state is not properly partitioned on per-site basis in current browsers. We address this problem by refining the general notion of a "same-origin" policy and implementing two browser extensions that enforce this policy on the browser cache and visited links.We also analyze various degrees of cooperation between sites to track users, and show that even if long-term browser state is properly partitioned, it is still possible for sites to use modern web features to bounce users between sites and invisibly engage in cross-domain tracking of their visitors. Cooperative privacy attacks are an unavoidable consequence of all persistent browser state that affects the behavior of the browser, and disabling or frequently expiring this state is the only way to achieve true privacy against colluding parties.}, annote = {Cache control directives (ETags) \par Cache timing attacks \par Cached content \par Using web caches to circumvent same-origin policy}, series = {{{WWW}} '06} } @inproceedings{jancWebBrowserHistory2010, title = {Web {{Browser History Detection}} as a {{Real}}-{{World Privacy Threat}}}, booktitle = {Computer {{Security}} \textendash{} {{ESORICS}} 2010}, author = {Janc, Artur and Olejnik, Lukasz}, editor = {Gritzalis, Dimitris and Preneel, Bart and Theoharidou, Marianthi}, year = {2010}, pages = {215--231}, abstract = {Web browser history detection using CSS visited styles has long been dismissed as an issue of marginal impact. However, due to recent changes in Web usage patterns, coupled with browser performance improvements, the long-standing issue has now become a significant threat to the privacy of Internet users.In this paper we analyze the impact of CSS-based history detection and demonstrate the feasibility of conducting practical attacks with minimal resources. We analyze Web browser behavior and detectability of content loaded via standard protocols and with various HTTP response codes. We develop an algorithm for efficient examination of large link sets and evaluate its performance in modern browsers. Compared to existing methods our approach is up to 6 times faster, and is able to detect up to 30,000 visited links per second.We present a novel Web application capable of effectively detecting clients' browsing histories and discuss real-world results obtained from 271,576 Internet users. Our results indicate that at least 76\% of Internet users are vulnerable to history detection, including over 94\% of Google Chrome users; for a test of most popular Internet websites we were able to detect, on average, 62.6 (median 22) visited locations per client. We also demonstrate the potential to profile users based on social news stories they visited, and to detect private data such as zipcodes or search queries typed into online forms.}, annote = {Analysis of CSS :visited selector \par Impact of CSS-based history detection}, series = {Lecture {{Notes}} in {{Computer Science}}} } @article{johansenMakingGDPRUsable2019, title = {Making {{GDPR Usable}}: {{A Model}} to {{Support Usability Evaluations}} of {{Privacy}}}, shorttitle = {Making {{GDPR Usable}}}, author = {Johansen, Johanna}, year = {2019}, month = aug, url = {http://arxiv.org/abs/1908.03503}, urldate = {2019-08-14}, abstract = {We introduce a new perspective on the evaluation of privacy, where rights of the data subjects, privacy principles, and usability criteria are intertwined. This new perspective is visually represented through a cube where each of its three axes of variability captures, respectively: principles, rights, and usability criteria. In this way, our model, called Usable Privacy Cube (or UP Cube), brings out two perspectives on privacy: that of the data subjects and that of the controllers/processors. In the long run, the UP Cube is meant to be the model behind a new certification methodology capable of evaluating the usability of privacy. Our research builds on the criteria proposed by the EuroPriSe certification scheme by adding usability criteria to their evaluation. We slightly reorganize the criteria of EuroPriSe to fit with the UP Cube model, i.e., we show how the EuroPriSe can be viewed as a combination of only principles and rights, forming the basis of the UP Cube. Usability criteria are defined based on goals that we extract from the data protection regulations, at the same time considering the needs, goals and characteristics of different types of users and their context of use. The criteria are designed to produce measurements of the level of usability with which the privacy goals of the data protection are reached. Considering usability criteria allows for greater business differentiation, beyond GDPR compliance.}, language = {English} } @inproceedings{juelsCacheCookiesBrowser2006, title = {Cache Cookies for Browser Authentication}, booktitle = {2006 {{IEEE Symposium}} on {{Security}} and {{Privacy}} ({{S P}}'06)}, author = {Juels, A. and Jakobsson, M. and Jagatic, T.N.}, year = {2006}, month = may, pages = {5 pp.-305}, abstract = {Like conventional cookies, cache cookies are data objects that servers store in Web browsers. Cache cookies, however, are unintentional byproducts of protocol design for browser caches. They do not enjoy any explicit interface support or security policies. In this paper, we show that despite limitations, cache cookies can play a useful role in the identification and authentication of users. Many users today block conventional cookies in their browsers as a privacy measure. The cache-cookie tools we propose can help restore lost usability and convenience to such users while maintaining good privacy. As we show, our techniques can also help combat online security threats such as phishing and pharming that ordinary cookies cannot. The ideas we introduce for cache-cookie management can strengthen ordinary cookies as well. The full version of this paper may be referenced at www.ravenwhite.com} } @inproceedings{kaizerCharacterizingWebsiteBehaviors2016, title = {Characterizing {{Website Behaviors Across Logged}}-in and {{Not}}-Logged-in {{Users}}}, booktitle = {Proceedings of the 2016 {{Internet Measurement Conference}}}, author = {Kaizer, Andrew J. and Gupta, Minaxi}, year = {2016}, month = nov, pages = {111--117}, abstract = {Users on today's Internet are subjected to a barrage of advertising and privacy concerning practices. However there is a gap in understanding the treatment of not-logged-in and logged-in users on websites. To address this gap in understanding, we create accounts and crawl 345 popular websites from 14 Alexa website categories while both not-logged-in and logged-in to determine -- for the first time -- how users are treated when logged-in versus not-logged-in with respect to the types and rat of ads and privacy concerns. We establish that logged-in users are treated to more ads and more privacy concerns on average and the website category greatly impacts the amount of ads/concerns users are subjected to. We also note that 42\textbackslash\% of website crawled leaked PII and identify that age/gender/zipcode are more valued than user/first/last name personal information. Finally, we observe that 463 unique third parties received at least one piece of PII, indicating that user PII is leaked more aggressively than previously known.}, series = {{{IMC}} '16} } @article{kakhkiInformationMarketWeb2019, title = {Information {{Market}} for {{Web Browsing}}: {{Design}}, {{Usability}} and {{Incremental Adoption}}}, shorttitle = {Information {{Market}} for {{Web Browsing}}}, author = {Kakhki, Arash Molavi and Erramilli, Vijay and Gill, Phillipa and Chaintreau, Augustin and Krishnamurthy, Balachander}, year = {2019}, month = jan, volume = {46}, pages = {24}, abstract = {Browsing privacy solutions are faced with an uphill battle to deployment. Many operate counter to the economic objectives of popular online services (e.g., by completely blocking ads) and do not provide enough incentive for users who may be subject to performance degradation for deploying them. In this study, we take a step towards realizing a system for online privacy that is mutually beneficial to users and online advertisers: an information market. This system not only maintains economic viability for online services, but provides users with financial compensation to encourage them to participate. We prototype and evaluate an information market that provides privacy and revenue to users while preserving and sometimes improving their Web performance. We evaluate feasibility of the market via a one month field study with 63 users and find that users are indeed willing to sell their browsing information. We also use Web traces of millions of users to drive a simulation study to evaluate the system at scale. We find that the system can indeed be profitable to both users and online advertisers.}, journal = {ACM SIGMETRICS Performance Evaluation Review}, number = {3} } @misc{kamkarEvercookieVirtuallyIrrevocable2010, title = {Evercookie - Virtually Irrevocable Persistent Cookies}, author = {Kamkar, Samy}, year = {2010}, month = sep, url = {https://samy.pl/evercookie/}, urldate = {2020-02-20}, note = {Accessed 2020-02-20} } @misc{kamkarSamykEvercookie2020, title = {Samyk/Evercookie}, author = {Kamkar, Samy}, year = {2020}, month = feb, url = {https://github.com/samyk/evercookie}, urldate = {2020-02-27}, abstract = {Produces persistent, respawning \"super\" cookies in a browser, abusing over a dozen techniques. Its goal is to identify users after they\&\#39;ve removed standard cookies and other privacy d...}, note = {Accessed 2020-02-27} } @article{karajWhoTracksMeShedding2019, title = {{{WhoTracks}} .{{Me}}: {{Shedding}} Light on the Opaque World of Online Tracking}, shorttitle = {{{WhoTracks}} .{{Me}}}, author = {Karaj, Arjaldo and Macbeth, Sam and Berson, R{\'e}mi and Pujol, Josep M.}, year = {2019}, month = apr, url = {http://arxiv.org/abs/1804.08959}, urldate = {2020-02-05}, abstract = {Online tracking has become of increasing concern in recent years, however our understanding of its extent to date has been limited to snapshots from web crawls. Previous attempts to measure the tracking ecosystem, have been done using instrumented measurement platforms, which are notable to accurately capture how people interact with the web. In this work we present a method for the measurement of tracking in the web through a browser extension, as well as a method for the aggregation and collection of this information which protects the privacy of participants. We deployed this extension to more than 5 million users, enabling measurement across multiple countries, ISPs and browser configurations, to give an accurate picture of real-world tracking. The result is the largest and longest measurement of online tracking to date based on real users, covering 1.5 billion page loads gathered over 12 months. The data, detailing tracking behaviour over a year, is made publicly available to help drive transparency around online tracking practices.}, annote = {Comment: 15 pages, 12 figures}, archivePrefix = {arXiv}, journal = {arXiv:1804.08959 [cs]}, primaryClass = {cs} } @inproceedings{khanEmpiricalAnalysisCommercial2018, title = {An {{Empirical Analysis}} of the {{Commercial VPN Ecosystem}}}, booktitle = {Proceedings of the {{Internet Measurement Conference}} 2018}, author = {Khan, Mohammad Taha and DeBlasio, Joe and Voelker, Geoffrey M. and Snoeren, Alex C. and Kanich, Chris and {Vallina-Rodriguez}, Narseo}, year = {2018}, month = oct, pages = {443--456}, address = {{New York, NY, USA}}, abstract = {Global Internet users increasingly rely on virtual private network (VPN) services to preserve their privacy, circumvent censorship, and access geo-filtered content. Due to their own lack of technical sophistication and the opaque nature of VPN clients, however, the vast majority of users have limited means to verify a given VPN service's claims along any of these dimensions. We design an active measurement system to test various infrastructural and privacy aspects of VPN services and evaluate 62 commercial providers. Our results suggest that while commercial VPN services seem, on the whole, less likely to intercept or tamper with user traffic than other, previously studied forms of traffic proxying, many VPNs do leak user traffic---perhaps inadvertently---through a variety of means. We also find that a non-trivial fraction of VPN providers transparently proxy traffic, and many misrepresent the physical location of their vantage points: 5--30\% of the vantage points, associated with 10\% of the providers we study, appear to be hosted on servers located in countries other than those advertised to users.}, series = {{{IMC}} '18} } @article{kitchenhamProceduresPerformingSystematic, title = {Procedures for {{Performing Systematic Reviews}}}, author = {Kitchenham, Barbara}, pages = {33} } @inproceedings{kleinDNSCacheBasedUser2019, title = {{{DNS Cache}}-{{Based User Tracking}}}, booktitle = {Proceedings 2019 {{Network}} and {{Distributed System Security Symposium}}}, author = {Klein, Amit and Pinkas, Benny}, year = {2019}, abstract = {We describe a novel user tracking technique that is based on assigning statistically unique DNS records per user. This new tracking technique is unique in being able to distinguish between machines that have identical hardware and software, and track users even if they use ``privacy mode'' browsing, or use multiple browsers (on the same machine).} } @article{kontaxisTrackingProtectionFirefox, title = {Tracking {{Protection}} in {{Firefox For Privacy}} and {{Performance}}}, author = {Kontaxis, Georgios and Chew, Monica}, pages = {4}, abstract = {We present Tracking Protection in the Mozilla Firefox web browser. Tracking Protection is a new privacy technology to mitigate invasive tracking of users' online activity by blocking requests to tracking domains. We evaluate our approach and demonstrate a 67.5\% reduction in the number of HTTP cookies set during a crawl of the Alexa top 200 news sites. Since Firefox does not download and render content from tracking domains, Tracking Protection also enjoys performance benefits of a 44\% median reduction in page load time and 39\% reduction in data usage in the Alexa top 200 news sites.} } @article{krishnamurthyLeakagePersonallyIdentifiable2010, title = {On the {{Leakage}} of {{Personally Identifiable Information}} via {{Online Social Networks}}}, author = {Krishnamurthy, Balachander and Wills, Craig E.}, year = {2010}, month = jan, volume = {40}, pages = {112--117}, abstract = {For purposes of this paper, we define "Personally identifiable information" (PII) as information which can be used to distinguish or trace an individual's identity either alone or when combined with other information that is linkable to a specific individual. The popularity of Online Social Networks (OSN) has accelerated the appearance of vast amounts of personal information on the Internet. Our research shows that it is possible for third-parties to link PII, which is leaked via OSNs, with user actions both within OSN sites and elsewhere on non-OSN sites. We refer to this ability to link PII and combine it with other information as "leakage". We have identified multiple ways by which such leakage occurs and discuss measures to prevent it.}, journal = {ACM SIGCOMM Computer Communication Review}, number = {1} } @inproceedings{krishnamurthyPrivacyLeakageVs2011, title = {Privacy Leakage vs . {{Protection}} Measures : {{The}} Growing Disconnect}, shorttitle = {Privacy Leakage vs . {{Protection}} Measures}, author = {Krishnamurthy, Balachander and Naryshkin, Konstantin and Wills, Craig E}, year = {2011}, abstract = {Numerous research papers have listed different vectors of personally identifiable information leaking via tradition al and mobile Online Social Networks (OSNs) and highlighted the ongoing aggregation of data about users visiting popular We b sites. We argue that the landscape is worsening and existing proposals (including the recent U.S. Federal Trade Commission's report) do not address several key issues. We examined over 100 popular non-OSN Web sites across a number of categories where tens of millions of users representing d iverse demographics have accounts, to see if these sites leak private information to prominent aggregators. Our results raise considerable concerns: we see leakage in sites for every category we examined; fully 56\% of the sites directly leak pieces of private information with this result growing to 75\% if we also include leakage of a site userid. Sensitive search strings sent to healthcare Web sites and travel itineraries on flight reservation sites are leaked in 9 of the top 10 sites studied for each category. The community needs a clear understanding of the shortcomings of existing privac y protection measures and the new proposals. The growing disconnect between the protection measures and increasing leakage and linkage suggests that we need to move beyond the losing battle with aggregators and examine what roles first-party sites can play in protecting privacy of their use rs.} } @inproceedings{krishnamurthyPrivacyLeakageVs2011a, title = {Privacy Leakage vs. Protection Measures: The Growing Disconnect}, shorttitle = {Privacy Leakage vs. Protection Measures}, booktitle = {In {{Web}} 2.0 {{Workshop}} on {{Security}} and {{Privacy}}}, author = {Krishnamurthy, Balachander and Naryshkin, Konstantin and Wills, Craig E.}, year = {2011}, pages = {2--11}, abstract = {Numerous research papers have listed different vectors of personally identifiable information leaking via traditional and mobile Online Social Networks (OSNs) and highlighted the ongoing aggregation of data about users visitingpopular Web sites. We argue that the landscape is worsening and existing proposals (including the recent U.S. Federal Trade Commission's report) do not address several key issues. We examined over 100 popular non-OSN Web sites across a number of categories where tens of millions of users representing diverse demographics have accounts, to see if these sites leak private information to prominent aggregators. Our results raise considerable concerns: we see leakage in sites for every category we examined; fully 56 \% of the sites directly leak pieces of private information with this result growing to 75 \% if we also include leakage of a site userid. Sensitive search strings sent to healthcare Web sites and travel itineraries on flight reservation sites are leaked in 9 of the top 10 sites studied for each category. The community needs a clear understanding of the shortcomings of existing privacy protection measures and the new proposals. The growing disconnect between the protection measures and increasing leakage and linkage suggests that we need to move beyond the losing battle with aggregators and examine what roles first-party sites can play in protecting privacy of their users. 1.} } @techreport{kristolHTTPStateManagement1997, title = {{{HTTP State Management Mechanism}}}, author = {Kristol, D. and Montulli, L.}, year = {1997}, month = feb, pages = {1--21}, institution = {{RFC Editor}}, url = {https://www.rfc-editor.org/info/rfc2109}, urldate = {2020-02-11}, abstract = {This document specifies a way to create a stateful session with HTTP requests and responses. It describes two new headers, Cookie and Set- Cookie, which carry state information between participating origin servers and user agents. The method described here differs from Netscape's Cookie proposal, but it can interoperate with HTTP/1.0 user agents that use Netscape's method.}, note = {Accessed 2020-02-11}, number = {2109}, type = {{{RFC}}} } @techreport{kristolHTTPStateManagement2000, title = {{{HTTP State Management Mechanism}}}, author = {Kristol, D. and Montulli, L.}, year = {2000}, month = oct, pages = {1--26}, institution = {{RFC Editor}}, url = {https://www.rfc-editor.org/info/rfc2965}, urldate = {2020-02-11}, abstract = {This document specifies a way to create a stateful session with Hypertext Transfer Protocol (HTTP) requests and responses.}, note = {Accessed 2020-02-11}, number = {2965}, type = {{{RFC}}} } @article{kuhnPrivacyNotionsAnonymous2019, title = {On {{Privacy Notions}} in {{Anonymous Communication}}}, author = {Kuhn, Christiane and Beck, Martin and Schiffner, Stefan and Jorswieck, Eduard and Strufe, Thorsten}, year = {2019}, month = apr, volume = {2019}, pages = {105--125}, abstract = {Many anonymous communication networks (ACNs) with different privacy goals have been developed. Still, there are no accepted formal definitions of privacy goals, and ACNs often define their goals ad hoc. However, the formal definition of privacy goals benefits the understanding and comparison of different flavors of privacy and, as a result, the improvement of ACNs. In this paper, we work towards defining and comparing privacy goals by formalizing them as privacy notions and identifying their building blocks. For any pair of notions we prove whether one is strictly stronger, and, if so, which. Hence, we are able to present a complete hierarchy. Using this rigorous comparison between notions, we revise inconsistencies between the existing works and improve the understanding of privacy goals.}, journal = {Proc. Priv. Enhancing Technol.}, number = {2} } @inproceedings{leonWhyJohnnyCan2012, title = {Why {{Johnny}} Can't Opt out: A Usability Evaluation of Tools to Limit Online Behavioral Advertising}, shorttitle = {Why {{Johnny}} Can't Opt Out}, booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, author = {Leon, Pedro and Ur, Blase and Shay, Richard and Wang, Yang and Balebako, Rebecca and Cranor, Lorrie}, year = {2012}, month = may, pages = {589--598}, abstract = {We present results of a 45-participant laboratory study investigating the usability of nine tools to limit online behavioral advertising (OBA). We interviewed participants about OBA and recorded their behavior and attitudes as they configured and used a privacy tool, such as a browser plugin that blocks requests to specific URLs, a tool that sets browser cookies indicating a user's preference to opt out of OBA, or the privacy settings built into a web browser. We found serious usability flaws in all tools we tested. Participants found many tools difficult to configure, and tools' default settings were often minimally protective. Ineffective communication, confusing interfaces, and a lack of feedback led many participants to conclude that a tool was blocking OBA when they had not properly configured it to do so. Without being familiar with many advertising companies and tracking technologies, it was difficult for participants to use the tools effectively.}, series = {{{CHI}} '12} } @inproceedings{leungShouldYouUse2016, title = {Should {{You Use}} the {{App}} for {{That}}? {{Comparing}} the {{Privacy Implications}} of {{App}}- and {{Web}}-Based {{Online Services}}}, shorttitle = {Should {{You Use}} the {{App}} for {{That}}?}, booktitle = {Proceedings of the 2016 {{Internet Measurement Conference}}}, author = {Leung, Christophe and Ren, Jingjing and Choffnes, David and Wilson, Christo}, year = {2016}, month = nov, pages = {365--372}, abstract = {Many popular, free online services provide cross-platform interfaces via Web browsers as well as apps on iOS and Android. To monetize these services, many additionally include tracking and advertising libraries that gather information about users with significant privacy implications. Given that the Web-based and mobile-app-based ecosystems evolve independently, an important open question is how these platforms compare with respect to user privacy. In this paper, we conduct the first head-to-head study of 50 popular, free online services to understand which is better for privacy---Web or app? We conduct manual tests, extract personally identifiable information (PII) shared over plaintext and encrypted connections, and analyze the data to understand differences in user-data collection across platforms for the same service. While we find that all platforms expose users' data, there are still opportunities to significantly limit how much information is shared with other parties by selectively using the app or Web version of a service.}, series = {{{IMC}} '16} } @article{libertPrivacyImplicationsHealth2015, title = {Privacy {{Implications}} of {{Health Information Seeking}} on the {{Web}}}, author = {Libert, Timothy}, year = {2015}, month = feb, volume = {58}, pages = {68--77}, abstract = {A revealing picture of how personal health information searches become the property of private corporations.}, journal = {Commun ACM}, number = {3} } @inproceedings{liTrackAdvisorTakingBack2015, title = {{{TrackAdvisor}}: {{Taking Back Browsing Privacy}} from {{Third}}-{{Party Trackers}}}, shorttitle = {{{TrackAdvisor}}}, booktitle = {Passive and {{Active Measurement}}}, author = {Li, Tai-Ching and Hang, Huy and Faloutsos, Michalis and Efstathopoulos, Petros}, editor = {Mirkovic, Jelena and Liu, Yong}, year = {2015}, pages = {277--289}, abstract = {Even though most web users assume that only the websites that they visit directly become aware of the visit, this belief is incorrect. Many website display contents hosted externally by third-party websites, which can track users and become aware of their web-surfing behavior. This phenomenon is called third-party tracking, and although such activities violate no law, they raise privacy concerns because the tracking is carried out without users' knowledge or explicit approval. Our work provides a systematic study of the third-party tracking phenomenon. First, we develop TrackAdvisor, arguably the first method that utilizes Machine Learning to identify the HTTP requests carrying sensitive information to third-party trackers with very high accuracy (100 \% Recall and 99.4 Precision). Microsoft's Tracking Protection Lists, which is a widely-used third-party tracking blacklist achieves only a Recall of 72.2 \%. Second, we quantify the pervasiveness of the third-party tracking phenomenon: 46 \% of the home pages of the websites in Alexa Global Top 10,000 have at least one third-party tracker, and Google, using third-party tracking, monitors 25 \% of these popular websites. Our overarching goal is to measure accurately how widespread third-party tracking is and hopefully would raise the public awareness to its potential privacy risks.}, series = {Lecture {{Notes}} in {{Computer Science}}} } @inproceedings{liuAnalyzingFacebookPrivacy2011, title = {Analyzing Facebook Privacy Settings: User Expectations vs. Reality}, shorttitle = {Analyzing Facebook Privacy Settings}, booktitle = {Proceedings of the 2011 {{ACM SIGCOMM}} Conference on {{Internet}} Measurement Conference}, author = {Liu, Yabing and Gummadi, Krishna P. and Krishnamurthy, Balachander and Mislove, Alan}, year = {2011}, month = nov, pages = {61--70}, abstract = {The sharing of personal data has emerged as a popular activity over online social networking sites like Facebook. As a result, the issue of online social network privacy has received significant attention in both the research literature and the mainstream media. Our overarching goal is to improve defaults and provide better tools for managing privacy, but we are limited by the fact that the full extent of the privacy problem remains unknown; there is little quantification of the incidence of incorrect privacy settings or the difficulty users face when managing their privacy. In this paper, we focus on measuring the disparity between the desired and actual privacy settings, quantifying the magnitude of the problem of managing privacy. We deploy a survey, implemented as a Facebook application, to 200 Facebook users recruited via Amazon Mechanical Turk. We find that 36\% of content remains shared with the default privacy settings. We also find that, overall, privacy settings match users' expectations only 37\% of the time, and when incorrect, almost always expose content to more users than expected. Finally, we explore how our results have potential to assist users in selecting appropriate privacy settings by examining the user-created friend lists. We find that these have significant correlation with the social network, suggesting that information from the social network may be helpful in implementing new tools for managing privacy.}, series = {{{IMC}} '11} } @inproceedings{malandrinoPrivacyAwarenessInformation2013, title = {Privacy {{Awareness}} about {{Information Leakage}}: {{Who}} Knows What about Me?}, shorttitle = {Privacy Awareness about Information Leakage}, booktitle = {Proceedings of the 12th {{ACM}} Workshop on {{Workshop}} on Privacy in the Electronic Society}, author = {Malandrino, Delfina and Petta, Andrea and Scarano, Vittorio and Serra, Luigi and Spinelli, Raffaele and Krishnamurthy, Balachander}, year = {2013}, month = nov, pages = {279--284}, abstract = {The task of protecting users' privacy is made more difficult by their attitudes towards information disclosure without full awareness and the economics of the tracking and advertising industry. Even after numerous press reports and widespread disclosure of leakages on the Web and on popular Online Social Networks, many users appear not be fully aware of the fact that their information may be collected, aggregated and linked with ambient information for a variety of purposes. Past attempts at alleviating this problem have addressed individual aspects of the user's data collection. In this paper we move towards a comprehensive and efficient client-side tool that maximizes users' awareness of the extent of their information leakage. We show that such a customizable tool can help users to make informed decisions on controlling their privacy footprint.}, series = {{{WPES}} '13} } @inproceedings{matthewsCanBrowserAddOns2018, title = {Can {{Browser Add}}-{{Ons Protect Your Children}} from {{Online Tracking}}?}, booktitle = {Proceedings of the 2018 {{ACM SIGSAC Conference}} on {{Computer}} and {{Communications Security}}}, author = {Matthews, Zachary and Vlajic, Natalija}, year = {2018}, month = oct, pages = {2243--2245}, abstract = {Online tracking of children by third-parties is strictly regulated by law in many regions of the world (e.g., COPPA in USA and GDPR in EU), and in a large number of situations constitutes criminal activity. Unfortunately, the existence of these laws does not seem to be an effective deterrence. In this paper, we provide a brief summary of our findings pertaining to the effectiveness of four popular browser add-ons in protecting against third-party tracking on a select number of children-oriented Web-sites. The obtain results show that protection from tracking by a browser add-on is generally achieved at the expense of Web-page performance. In other words, add-ons that are effective at blocking third-party trackers will often adversely affect the normal functioning of the visited Web-page(s). In addition, our results also show that when it comes to user/children tracking by well-known 'tech giants', all four add-ons are likely to provide only limited protection.}, series = {{{CCS}} '18} } @inproceedings{mayerThirdPartyWebTracking2012, title = {Third-{{Party Web Tracking}}: {{Policy}} and {{Technology}}}, shorttitle = {Third-{{Party Web Tracking}}}, booktitle = {Proceedings of the 2012 {{IEEE Symposium}} on {{Security}} and {{Privacy}}}, author = {Mayer, Jonathan R. and Mitchell, John C.}, year = {2012}, pages = {413--427}, abstract = {In the early days of the web, content was designed and hosted by a single person, group, or organization. No longer. Webpages are increasingly composed of content from myriad unrelated "third-party" websites in the business of advertising, analytics, social networking, and more. Third-party services have tremendous value: they support free content and facilitate web innovation. But third-party services come at a privacy cost: researchers, civil society organizations, and policymakers have increasingly called attention to how third parties can track a user's browsing activities across websites. This paper surveys the current policy debate surrounding third-party web tracking and explains the relevant technology. It also presents the FourthParty web measurement platform and studies we have conducted with it. Our aim is to inform researchers with essential background and tools for contributing to public understanding and policy debates about web tracking.}, series = {{{SP}} '12} } @article{melicherNotTrackMe2016, title = {({{Do Not}}) {{Track Me Sometimes}}: {{Users}}' {{Contextual Preferences}} for {{Web Tracking}}}, shorttitle = {({{Do Not}}) {{Track Me Sometimes}}}, author = {Melicher, William and Sharif, Mahmood and Tan, Joshua and Bauer, Lujo and Christodorescu, Mihai and Leon, Pedro Giovanni}, year = {2016}, month = apr, volume = {2016}, pages = {135--154}, journal = {PoPETs}, language = {English}, number = {2} } @inproceedings{merzdovnikBlockMeIf2017, title = {Block {{Me If You Can}}: {{A Large}}-{{Scale Study}} of {{Tracker}}-{{Blocking Tools}}}, shorttitle = {Block {{Me If You Can}}}, booktitle = {2017 {{IEEE European Symposium}} on {{Security}} and {{Privacy}} ({{EuroS P}})}, author = {Merzdovnik, Georg and Huber, Markus and Buhov, Damjan and Nikiforakis, Nick and Neuner, Sebastian and Schmiedecker, Martin and Weippl, Edgar}, year = {2017}, month = apr, pages = {319--333}, abstract = {In this paper, we quantify the effectiveness of third-party tracker blockers on a large scale. First, we analyze the architecture of various state-of-the-art blocking solutions and discuss the advantages and disadvantages of each method. Second, we perform a two-part measurement study on the effectiveness of popular tracker-blocking tools. Our analysis quantifies the protection offered against trackers present on more than 100,000 popular websites and 10,000 popular Android applications. We provide novel insights into the ongoing arms race between trackers and developers of blocking tools as well as which tools achieve the best results under what circumstances. Among others, we discover that rule-based browser extensions outperform learning-based ones, trackers with smaller footprints are more successful at avoiding being blocked, and CDNs pose a major threat towards the future of tracker-blocking tools. Overall, the contributions of this paper advance the field of web privacy by providing not only the largest study to date on the effectiveness of tracker-blocking tools, but also by highlighting the most pressing challenges and privacy issues of third-party tracking.} } @misc{michaelStraceLinuxManual2020, title = {Strace(1) - {{Linux}} Manual Page}, author = {Michael, Kerrisk}, year = {2020}, month = feb, url = {http://www.man7.org/linux/man-pages/man1/strace.1.html}, urldate = {2020-02-20}, note = {Accessed 2020-02-20} } @article{mugheesDetectingAdBlockersWild2017, title = {Detecting {{Anti Ad}}-{{Blockers}} in the {{Wild}}}, author = {Mughees, Muhammad Haris and Qian, Zhiyun and Shafiq, Zubair}, year = {2017}, month = jul, volume = {2017}, pages = {130--146}, journal = {PoPETs}, number = {3} } @misc{NetflixBeginsRollOut2010, title = {Netflix {{Begins Roll}}-{{Out}} of 2nd {{Generation Media Player}} for {{Instant Streaming}} on {{Windows PCs}} and {{Intel Macs}} - {{Oct}} 27, 2008}, year = {2010}, month = may, url = {https://web.archive.org/web/20100529122655/http://netflix.mediaroom.com/index.php?s=43\&item=288}, urldate = {2020-02-20}, note = {Accessed 2020-02-20} } @article{oatesTurtlesLocksBathrooms2018, title = {Turtles, {{Locks}}, and {{Bathrooms}}: {{Understanding Mental Models}} of {{Privacy Through Illustration}}}, shorttitle = {Turtles, {{Locks}}, and {{Bathrooms}}}, author = {Oates, Maggie and Ahmadullah, Yama and Marsh, Abigail and Swoopes, Chelse and Zhang, Shikun and Balebako, Rebecca and Cranor, Lorrie Faith}, year = {2018}, month = oct, volume = {2018}, pages = {5--32}, journal = {PoPETs}, number = {4} } @inproceedings{olejnikSellingPrivacyAuction2014, title = {Selling off {{Privacy}} at {{Auction}}}, booktitle = {Proceedings 2014 {{Network}} and {{Distributed System Security Symposium}}}, author = {Olejnik, Lukasz and Tran, Minh-Dung and Castelluccia, Claude}, year = {2014}, month = feb, abstract = {Real-Time Bidding (RTB) and Cookie Matching (CM) are transforming the advertising landscape to an extremely dynamic market and make targeted advertising considerably permissive. The emergence of these technologies allows companies to exchange user data as a product and therefore raises important concerns from privacy perspectives. In this paper, we perform a privacy analysis of CM and RTB and quantify the leakage of users' browsing histories due to these mechanisms. We study this problem on a corpus of users' Web histories, and show that using these technologies, certain companies can significantly improve their tracking and profiling capabilities. We detect \$41\$ companies serving ads via RTB and over \$125\$ using Cookie Matching. We show that \$91\textbackslash\%\$ of users in our dataset were affected by CM and in certain cases, \$27\textbackslash\%\$ of users' Web browsing histories could be leaked to 3rd-party companies through RTB. We expose a design characteristic of RTB systems to observe the prices which advertisers pay for serving ads to Web users. We leverage this feature and provide important insights into these prices by analyzing different user profiles and visiting contexts. Our study shows the variation of prices according to context information including visiting site, time and user's physical location. We experimentally confirm that users with known Web browsing history are evaluated higher than new comers, that some user profiles are more valuable than others, and that users' intents, such as looking for a commercial product, are sold at higher prices than users' Web browsing histories. In addition, we show that there is a huge gap between users' perception of the value of their personal information and its actual value on the market. A recent study by Carrascal et al. showed that, on average, users evaluate the price of the disclosure of their presence on a Web site to EUR 7. We show that user's Web browsing history elements are routinely being sold off for less than \$0.0005.} } @inproceedings{papadopoulosCookieSynchronizationEverything2019, title = {Cookie {{Synchronization}}: {{Everything You Always Wanted}} to {{Know But Were Afraid}} to {{Ask}}}, shorttitle = {Cookie {{Synchronization}}}, booktitle = {The {{World Wide Web Conference}}}, author = {Papadopoulos, Panagiotis and Kourtellis, Nicolas and Markatos, Evangelos}, year = {2019}, month = may, pages = {1432--1442}, abstract = {User data is the primary input of digital advertising, fueling the free Internet as we know it. As a result, web companies invest a lot in elaborate tracking mechanisms to acquire user data that can sell to data markets and advertisers. However, with same-origin policy and cookies as a primary identification mechanism on the web, each tracker knows the same user with a different ID. To mitigate this, Cookie Synchronization (CSync) came to the rescue, facilitating an information sharing channel between 3rd-parties that may or not have direct access to the website the user visits. In the background, with CSync, they merge user data they own, but also reconstruct a user's browsing history, bypassing the same origin policy. In this paper, we perform a first to our knowledge in-depth study of CSync in the wild, using a year-long weblog from 850 real mobile users. Through our study, we aim to understand the characteristics of the CSync protocol and the impact it has on web users' privacy. For this, we design and implement CONRAD, a holistic mechanism to detect CSync events at real time, and the privacy loss on the user side, even when the synced IDs are obfuscated. Using CONRAD, we find that 97\% of the regular web users are exposed to CSync: most of them within the first week of their browsing, and the median userID gets leaked, on average, to 3.5 different domains. Finally, we see that CSync increases the number of domains that track the user by a factor of 6.75.}, series = {{{WWW}} '19} } @inproceedings{papadopoulosExclusiveHowSynced2018, title = {Exclusive: {{How}} the (Synced) {{Cookie Monster}} Breached My Encrypted {{VPN}} Session}, shorttitle = {Exclusive}, booktitle = {Proceedings of the 11th {{European Workshop}} on {{Systems Security}}}, author = {Papadopoulos, Panagiotis and Kourtellis, Nicolas and Markatos, Evangelos P.}, year = {2018}, month = apr, pages = {1--6}, abstract = {In recent years, and after the Snowden revelations, there has been a significant movement in the web from organizations, policymakers and individuals to enhance the privacy awareness among users. As a consequence, more and more publishers support TLS in their websites, and vendors provide privacy and anonymity tools, such as secure VPNs or Tor onions, to cover the need of users for privacy-preserving web browsing. But is the sporadic appliance of such tools enough to provide privacy? In this paper, we describe two privacy-breaching threats against users accessing the Internet over a secure VPN. The breaches are made possible through Cookie Synchronization, nowadays widely used by third parties for advertisement and tracking purposes. The generated privacy leaks can be used by a snooping entity such as an ISP, to re-identify a user in the web and reveal their browsing history even when users are hidden behind a VPN. By probing the top 12K Alexa sites, we find that 1 out of 13 websites expose their users to these privacy leaks.}, series = {{{EuroSec}}'18} } @inproceedings{papadopoulosKsubscriptionPrivacyPreservingMicroblogging2013, title = {K-Subscription: {{Privacy}}-{{Preserving Microblogging Browsing Through Obfuscation}}}, shorttitle = {K-Subscription}, booktitle = {Proceedings of the 29th {{Annual Computer Security Applications Conference}}}, author = {Papadopoulos, Panagiotis and Papadogiannakis, Antonis and Polychronakis, Michalis and Zarras, Apostolis and Holz, Thorsten and Markatos, Evangelos P.}, year = {2013}, month = dec, pages = {49--58}, abstract = {Over the past few years, microblogging social networking services have become a popular means for information sharing and communication. Besides sharing information among friends, such services are currently being used by artists, politicians, news channels, and information providers to easily communicate with their constituency. Even though following specific channels on a microblogging service enables users to receive interesting information in a timely manner, it may raise significant privacy concerns as well. For example, the microblogging service is able to observe all the channels that a particular user follows. This way, it can infer all the subjects a user might be interested in and generate a detailed profile of this user. This knowledge can be used for a variety of purposes that are usually beyond the control of the users. To address these privacy concerns, we propose k-subscription: an obfuscation-based approach that enables users to follow privacy-sensitive channels, while, at the same time, making it difficult for the microblogging service to find out their actual interests. Our method relies on obfuscation: in addition to each privacy-sensitive channel, users are encouraged to randomly follow k -- 1 other channels they are not interested in. In this way (i) their actual interests are hidden in random selections, and (ii) each user contributes in hiding the real interests of other users. Our analysis indicates that k-subscription makes it difficult for attackers to pinpoint a user's interests with significant confidence. We show that this confidence can be made predictably small by slightly adjusting k while adding a reasonably low overhead on the user's system.}, series = {{{ACSAC}} '13} } @misc{perryDesignImplementationTor2018, title = {The {{Design}} and {{Implementation}} of the {{Tor Browser}} [{{DRAFT}}]}, author = {Perry, Mike and Clark, Erinn and Murdoch, Steven and Koppen, Georg}, year = {2018}, month = jun, url = {https://2019.www.torproject.org/projects/torbrowser/design/}, urldate = {2020-07-15}, note = {Accessed 2020-07-15} } @misc{PersistenceServiceJNLPAPI2015, title = {{{PersistenceService}} ({{JNLP API Reference}} 1.7.0\_95)}, year = {2015}, url = {https://docs.oracle.com/javase/7/docs/jre/api/javaws/jnlp/javax/jnlp/PersistenceService.html}, urldate = {2020-02-20}, note = {Accessed 2020-02-20} } @inproceedings{pujolAnnoyedUsersAds2015, title = {Annoyed {{Users}}: {{Ads}} and {{Ad}}-{{Block Usage}} in the {{Wild}}}, shorttitle = {Annoyed {{Users}}}, booktitle = {Proceedings of the 2015 {{Internet Measurement Conference}}}, author = {Pujol, Enric and Hohlfeld, Oliver and Feldmann, Anja}, year = {2015}, month = oct, pages = {93--106}, abstract = {Content and services which are offered for free on the Internet are primarily monetized through online advertisement. This business model relies on the implicit agreement between content providers and users where viewing ads is the price for the "free" content. This status quo is not acceptable to all users, however, as manifested by the rise of ad-blocking plugins which are available for all popular Web browsers. Indeed, ad-blockers have the potential to substantially disrupt the widely established business model of "free" content, currently one of the core elements on which the Web is built. In this work, we shed light on how users interact with ads. We show how to leverage the functionality of AdBlock Plus, one of the most popular ad-blockers to identify ad traffic from passive network measurements. We complement previous work, which focuses on active measurements, by characterizing ad-traffic in the wild, i.e., as seen in a residential broadband network of a major European ISP. Finally, we assess the prevalence of ad-blockers in this particular network and discuss possible implications for content providers and ISPs.}, series = {{{IMC}} '15} } @article{RefererHeaderPrivacy, title = {Referer Header: Privacy and Security Concerns}, shorttitle = {Referer Header}, url = {https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns}, urldate = {2020-02-08}, abstract = {There are privacy and security risks associated with the Referer HTTP header. This article describes them, and offers advice on mitigating those risks.}, journal = {MDN Web Docs}, note = {Accessed 2020-02-08} } @inproceedings{reznichenkoAuctionsDonottrackCompliant2011, title = {Auctions in Do-Not-Track Compliant Internet Advertising}, booktitle = {Proceedings of the 18th {{ACM}} Conference on {{Computer}} and Communications Security}, author = {Reznichenko, Alexey and Guha, Saikat and Francis, Paul}, year = {2011}, month = oct, pages = {667--676}, abstract = {Online tracking of users in support of behavioral advertising is widespread. Several researchers have proposed non-tracking online advertising systems that go well beyond the requirements of the Do-Not-Track initiative launched by the US Federal Trace Commission (FTC). The primary goal of these systems is to allow for behaviorally targeted advertising without revealing user behavior (clickstreams) or user profiles to the ad network. Although these designs purport to be practical solutions, none of them adequately consider the role of the ad auctions, which today are central to the operation of online advertising systems. This paper looks at the problem of running auctions that leverage user profiles for ad ranking while keeping the user profile private. We define the problem, broadly explore the solution space, and discuss the pros and cons of these solutions. We analyze the performance of our solutions using data from Microsoft Bing advertising auctions. We conclude that, while none of our auctions are ideal in all respects, they are adequate and practical solutions.}, series = {{{CCS}} '11} } @inproceedings{roesnerDetectingDefendingThirdparty2012, title = {Detecting and Defending against Third-Party Tracking on the Web}, booktitle = {Proceedings of the 9th {{USENIX}} Conference on {{Networked Systems Design}} and {{Implementation}}}, author = {Roesner, Franziska and Kohno, Tadayoshi and Wetherall, David}, year = {2012}, month = apr, pages = {12}, abstract = {While third-party tracking on the web has garnered much attention, its workings remain poorly understood. Our goal is to dissect how mainstream web tracking occurs in the wild. We develop a client-side method for detecting and classifying five kinds of third-party trackers based on how they manipulate browser state. We run our detection system while browsing the web and observe a rich ecosystem, with over 500 unique trackers in our measurements alone. We find that most commercial pages are tracked by multiple parties, trackers vary widely in their coverage with a small number being widely deployed, and many trackers exhibit a combination of tracking behaviors. Based on web search traces taken from AOL data, we estimate that several trackers can each capture more than 20\% of a user's browsing behavior. We further assess the impact of defenses on tracking and find that no existing browser mechanisms prevent tracking by social media sites via widgets while still allowing those widgets to achieve their utility goals, which leads us to develop a new defense. To the best of our knowledge, our work is the most complete study of web tracking to date.}, series = {{{NSDI}}'12} } @inproceedings{ruohonenInvisiblePixelsAre2018, title = {Invisible {{Pixels Are Dead}}, {{Long Live Invisible Pixels}}!}, booktitle = {Proceedings of the 2018 {{Workshop}} on {{Privacy}} in the {{Electronic Society}}}, author = {Ruohonen, Jukka and Lepp{\"a}nen, Ville}, year = {2018}, pages = {28--32}, abstract = {Privacy has deteriorated in the world wide web ever since the 1990s. The tracking of browsing habits by different third-parties has been at the center of this deterioration. Web cookies and so-called web beacons have been the classical ways to implement third-party tracking. Due to the introduction of more sophisticated technical tracking solutions and other fundamental transformations, the use of classical image-based web beacons might be expected to have lost their appeal. According to a sample of over thirty thousand images collected from popular websites, this paper shows that such an assumption is a fallacy: classical 1 x 1 images are still commonly used for third-party tracking in the contemporary world wide web. While it seems that ad-blockers are unable to fully block these classical image-based tracking beacons, the paper further demonstrates that even limited information can be used to accurately classify the third-party 1 x 1 images from other images. An average classification accuracy of 0.956 is reached in the empirical experiment. With these results the paper contributes to the ongoing attempts to better understand the lack of privacy in the world wide web, and the means by which the situation might be eventually improved.}, series = {{{WPES}}'18} } @inproceedings{russoTrackingInformationFlow2009, title = {Tracking {{Information Flow}} in {{Dynamic Tree Structures}}}, booktitle = {Computer {{Security}} \textendash{} {{ESORICS}} 2009}, author = {Russo, Alejandro and Sabelfeld, Andrei and Chudnov, Andrey}, editor = {Backes, Michael and Ning, Peng}, year = {2009}, pages = {86--103}, abstract = {This paper explores the problem of tracking information flow in dynamic tree structures. Motivated by the problem of manipulating the Document Object Model (DOM) trees by browser-run client-side scripts, we address the dynamic nature of interactions via tree structures. We present a runtime enforcement mechanism that monitors this interaction and prevents a range of attacks, some of them missed by previous approaches, that exploit the tree structure in order to transfer sensitive information. We formalize our approach for a simple language with DOM-like tree operations and show that the monitor prevents scripts from disclosing secrets.}, annote = {M\"oglicherweise interessant f\"ur window.name Property und allgemeines tracking mit dem Document Object Model}, series = {Lecture {{Notes}} in {{Computer Science}}} } @article{samarasingheGlobalPerspectiveWeb2019, title = {Towards a Global Perspective on Web Tracking}, author = {Samarasinghe, Nayanamana and Mannan, Mohammad}, year = {2019}, month = nov, volume = {87}, pages = {101569}, abstract = {Several past measurement studies uncovered various aspects of web-based tracking and its serious impact on user privacy. Most studies used institutional resources, e.g., computers hosted at well-known universities, or cloud-computing infrastructures such as Amazon EC2, confining the study to a particular geolocation or a few locations. Would there be any difference if web tracking is measured from actual user-owned residential machines? Does a user's geolocation affect web tracking? Past studies do not adequately answer these important questions, although web users come from across the globe, and tracking primarily targets home users. As a step forward, we leverage the Luminati proxy service to run a measurement study using residential machines from 56 countries. We rely on the OpenWPM web privacy measurement framework to analyze third-party scripts and cookies in 2050 distinct URLs (Alexa Top-1000 home pages and Alexa Top-50 country-specific home pages for all 56 countries, and shared URLs via Twitter from Alexa Top-1000 domains for 10 countries). Our findings reveal that the prevalence of web tracking varies across the globe. In addition to location, tracking also seems to depend on factors such as data privacy policies, Internet speed and censorship. We also observe that despite legal efforts for strengthening privacy, such as the EU cookie law, violations are common and very blatant in some cases, highlighting the need for more effective tools and frameworks for compliance monitoring and enforcement.}, journal = {Computers \& Security} } @inproceedings{sanchez-rolaBakingTimerPrivacyAnalysis2019, title = {{{BakingTimer}}: Privacy Analysis of Server-Side Request Processing Time}, shorttitle = {{{BakingTimer}}}, booktitle = {Proceedings of the 35th {{Annual Computer Security Applications Conference}}}, author = {{Sanchez-Rola}, Iskander and Balzarotti, Davide and Santos, Igor}, year = {2019}, month = dec, pages = {478--488}, abstract = {Cookies were originally introduced as a way to provide state awareness to websites, and are now one of the backbones of the current web. However, their use is not limited to store the login information or to save the current state of user browsing. In several cases, third-party cookies are deliberately used for web tracking, user analytics, and for online advertisement, with the subsequent privacy loss for the end users. However, cookies are not the only technique capable of retrieving the users' browsing history. In fact, history sniffing techniques are capable of tracking the users' browsing history without relying on any specific code in a third-party website, but only on code executed within the visited site. Many sniffing techniques have been proposed to date, but they usually have several limitations and they are not able to differentiate between multiple possible states within the target application. In this paper we propose BakingTimer, a new history sniffing technique based on timing the execution of server-side request processing code. This method is capable of retrieving partial or complete user browsing history, it does not require any permission, and it can be performed through both first and third-party scripts. We studied the impact of our timing side-channel attack to detect prior visits to websites, and discovered that it was capable of detecting the users state in more than half of the 10K websites analyzed, which is the largest test performed to date to test this type of techniques. We additionally performed a manual analysis to check the capabilities of the attack to differentiate between three states: never accessed, accessed and logged in. Moreover, we performed a set of stability tests, to verify that our time measurements are robust with respect to changes both in the network RTT and in the servers workload.}, annote = {Timing attack using processing time on the server side}, series = {{{ACSAC}} '19} } @inproceedings{sanchez-rolaCanOptOut2019a, title = {Can {{I Opt Out Yet}}? {{GDPR}} and the {{Global Illusion}} of {{Cookie Control}}}, shorttitle = {Can {{I Opt Out Yet}}?}, booktitle = {Proceedings of the 2019 {{ACM Asia Conference}} on {{Computer}} and {{Communications Security}}}, author = {{Sanchez-Rola}, Iskander and Dell'Amico, Matteo and Kotzias, Platon and Balzarotti, Davide and Bilge, Leyla and Vervier, Pierre-Antoine and Santos, Igor}, year = {2019}, month = jul, pages = {340--351}, abstract = {The European Union's (EU) General Data Protection Regulation (GDPR), in effect since May 2018, enforces strict limitations on handling users' personal data, hence impacting their activity tracking on the Web. In this study, we perform an evaluation of the tracking performed in 2,000 high-traffic websites, hosted both inside and outside of the EU. We evaluate both the information presented to users and the actual tracking implemented through cookies; we find that the GDPR has impacted website behavior in a truly global way, both directly and indirectly: USA-based websites behave similarly to EU-based ones, while third-party opt-out services reduce the amount of tracking even for websites which do not put any effort in respecting the new law. On the other hand, we find that tracking remains ubiquitous. In particular, we found cookies that can identify users when visiting more than 90\% of the websites in our dataset - and we also encountered a large number of websites that present deceiving information, making it it very difficult, if at all possible, for users to avoid being tracked.}, series = {Asia {{CCS}} '19} } @article{sanchez-rolaWebWatchingYou2017, title = {The Web Is Watching You: {{A}} Comprehensive Review of Web-Tracking Techniques and Countermeasures}, shorttitle = {The Web Is Watching You}, author = {{Sanchez-Rola}, Iskander and {Ugarte-Pedrero}, Xabier and Santos, Igor and Bringas, Pablo G.}, year = {2017}, month = feb, volume = {25}, pages = {18--29}, abstract = {Web tracking is a commonly-used practice on the Internet devoted to retrieve user information for activities such as personalization or advertisement. These tec}, journal = {Log J IGPL}, number = {1} } @article{schelterUbiquityWebTracking2016, title = {On the {{Ubiquity}} of {{Web Tracking}}: {{Insights}} from a {{Billion}}-{{Page Web Crawl}}}, shorttitle = {On the {{Ubiquity}} of {{Web Tracking}}}, author = {Schelter, Sebastian and Kunegis, J{\'e}r{\^o}me}, year = {2016}, month = jul, url = {http://arxiv.org/abs/1607.07403}, urldate = {2019-08-14}, abstract = {We perform a large-scale analysis of third-party trackers on the World Wide Web. We extract third-party embeddings from more than 3.5 billion web pages of the CommonCrawl 2012 corpus, and aggregate those to a dataset containing more than 140 million third-party embeddings in over 41 million domains. To the best of our knowledge, this constitutes the largest empirical web tracking dataset collected so far, and exceeds related studies by more than an order of magnitude in the number of domains and web pages analyzed.} } @misc{SilverlightEndSupport2015, title = {Silverlight {{End}} of {{Support}}}, year = {2015}, month = jul, url = {https://support.microsoft.com/en-us/help/4511036/silverlight-end-of-support}, urldate = {2020-02-20}, note = {Accessed 2020-02-20} } @inproceedings{sirurAreWeThere2018, title = {Are {{We There Yet}}?: {{Understanding}} the {{Challenges Faced}} in {{Complying}} with the {{General Data Protection Regulation}} ({{GDPR}})}, shorttitle = {Are {{We There Yet}}?}, booktitle = {Proceedings of the {{2Nd International Workshop}} on {{Multimedia Privacy}} and {{Security}}}, author = {Sirur, Sean and Nurse, Jason R.C. and Webb, Helena}, year = {2018}, pages = {88--95}, abstract = {The EU General Data Protection Regulation (GDPR), enforced from 25\textbackslash textsuperscriptth May 2018, aims to reform how organisations view and control the personal data of private EU citizens. The scope of GDPR is somewhat unprecedented: it regulates every aspect of personal data handling, includes hefty potential penalties for non-compliance, and can prosecute any company in the world that processes EU citizens' data. In this paper, we look behind the scenes to investigate the real challenges faced by organisations in engaging with the GDPR. This considers issues in working with the regulation, the implementation process, and how compliance is verified. Our research approach relies on literature but, more importantly, draws on detailed interviews with several organisations. Key findings include the fact that large organisations generally found GDPR compliance to be reasonable and doable. The same was found for small-to-medium organisations (SMEs/SMBs) that were highly security-oriented. SMEs with less focus on data protection struggled to make what they felt was a satisfactory attempt at compliance. The main issues faced in their compliance attempts emerged from: the sheer breadth of the regulation; questions around how to enact the qualitative recommendations of the regulation; and the need to map out the entirety of their complex data networks.}, series = {{{MPS}} '18} } @article{solomosClashTrackersMeasuring2019, title = {Clash of the {{Trackers}}: {{Measuring}} the {{Evolution}} of the {{Online Tracking Ecosystem}}}, shorttitle = {Clash of the {{Trackers}}}, author = {Solomos, Konstantinos and Ilia, Panagiotis and Ioannidis, Sotiris and Kourtellis, Nicolas}, year = {2019}, month = jul, url = {http://arxiv.org/abs/1907.12860}, urldate = {2019-08-14}, abstract = {Websites are constantly adapting the methods used, and intensity with which they track online visitors. However, the wide-range enforcement of GDPR since one year ago (May 2018) forced websites serving EU-based online visitors to eliminate or at least reduce such tracking activity, given they receive proper user consent. erefore, it is important to record and analyze the evolution of this tracking activity and assess the overall ``privacy health'' of the Web ecosystem and if it is be er a er GDPR enforcement. is work makes a significant step towards this direction. In this paper, we analyze the online ecosystem of 3rd-parties embedded in top websites which amass the majority of online tracking through 6 time snapshots taken every few months apart, in the duration of the last 2 years. We perform this analysis in three ways: 1) by looking into the network activity that 3rd-parties impose on each publisher hosting them, 2) by constructing a bipartite graph of ``publisher-to-tracker'', connecting 3rd parties with their publishers, 3) by constructing a ``tracker-to-tracker'' graph connecting 3rd-parties who are commonly found in publishers. We record significant changes through time in number of trackers, traffic induced in publishers (incoming vs. outgoing), embeddedness of trackers in publishers, popularity and mixture of trackers across publishers. We also report how such measures compare with the ranking of publishers based on Alexa. On the last level of our analysis, we dig deeper and look into the connectivity of trackers with each other and how this relates to potential cookie synchronization activity.} } @techreport{soltaniFlashCookiesPrivacy2009, title = {Flash {{Cookies}} and {{Privacy}}}, author = {Soltani, Ashkan and Canty, Shannon and Mayo, Quentin and Thomas, Lauren and Hoofnagle, Chris Jay}, year = {2009}, month = aug, institution = {{Social Science Research Network}}, url = {https://papers.ssrn.com/abstract=1446862}, urldate = {2020-02-13}, abstract = {This is a pilot study of the use of 'Flash cookies' by popular websites. We find that more than 50\% of the sites in our sample are using flash cookies to store information about the user. Some are using it to 'respawn' or re-instantiate HTTP cookies deleted by the user. Flash cookies often share the same values as HTTP cookies, and are even used on government websites to assign unique values to users. Privacy policies rarely disclose the presence of Flash cookies, and user controls for effectuating privacy preferences are lacking.}, note = {Accessed 2020-02-13}, number = {ID 1446862}, type = {{{SSRN Scholarly Paper}}} } @article{someControlWhatYou2017, title = {Control {{What You Include}}! {{Server}}-{{Side Protection}} against {{Third Party Web Tracking}}}, author = {Som{\'e}, Doli{\`e}re Francis and Bielova, Nataliia and Rezk, Tamara}, year = {2017}, month = mar, url = {http://arxiv.org/abs/1703.07578}, urldate = {2019-08-14}, abstract = {Third party tracking is the practice by which third parties recognize users accross different websites as they browse the web. Recent studies show that 90\% of websites contain third party content that is tracking its users across the web. Website developers often need to include third party content in order to provide basic functionality. However, when a developer includes a third party content, she cannot know whether the third party contains tracking mechanisms. If a website developer wants to protect her users from being tracked, the only solution is to exclude any third-party content, thus trading functionality for privacy.} } @misc{soudersAnnouncingHTTPArchive2011, title = {Announcing the {{HTTP Archive}} | {{High Performance Web Sites}}}, author = {Souders, Steve}, year = {2011}, month = mar, url = {https://www.stevesouders.com/blog/2011/03/30/announcing-the-http-archive/}, urldate = {2020-03-22}, journal = {stevesouders.com}, note = {Accessed 2020-03-22}, type = {Blog} } @article{starovAreYouSure2016, title = {Are {{You Sure You Want}} to {{Contact Us}}? {{Quantifying}} the {{Leakage}} of {{PII}} via {{Website Contact Forms}}}, shorttitle = {Are {{You Sure You Want}} to {{Contact Us}}?}, author = {Starov, Oleksii and Gill, Phillipa and Nikiforakis, Nick}, year = {2016}, month = jan, volume = {2016}, pages = {20--33}, journal = {PoPETs}, number = {1} } @misc{StartpageCom, title = {Startpage.Com}, url = {https://www.startpage.com}, urldate = {2020-07-10}, abstract = {Startpage.com delivers online tools that help you to stay in control of your personal information and protect your online privacy.}, journal = {www.startpage.com}, note = {Accessed 2020-07-10} } @misc{statcounterSearchEngineMarket, title = {Search {{Engine Market Share Worldwide}}}, author = {StatCounter}, url = {https://gs.statcounter.com/search-engine-market-share}, urldate = {2020-07-10}, abstract = {This graph shows the market share of search engines worldwide based on over 10 billion monthly page views.}, journal = {StatCounter Global Stats}, note = {Accessed 2020-07-10} } @article{syQUICLookWeb2019, title = {A {{QUIC Look}} at {{Web Tracking}}}, author = {Sy, Erik and Burkert, Christian and Federrath, Hannes and Fischer, Mathias}, year = {2019}, month = jul, volume = {2019}, pages = {255--266}, journal = {PoPETs}, number = {3} } @inproceedings{syTrackingUsersWeb2018, title = {Tracking {{Users}} across the {{Web}} via {{TLS Session Resumption}}}, booktitle = {Proceedings of the 34th {{Annual Computer Security Applications Conference}}}, author = {Sy, Erik and Burkert, Christian and Federrath, Hannes and Fischer, Mathias}, year = {2018}, month = dec, pages = {289--299}, abstract = {User tracking on the Internet can come in various forms, e.g., via cookies or by fingerprinting web browsers. A technique that got less attention so far is user tracking based on TLS and specifically based on the TLS session resumption mechanism. To the best of our knowledge, we are the first that investigate the applicability of TLS session resumption for user tracking. For that, we evaluated the configuration of 48 popular browsers and one million of the most popular websites. Moreover, we present a so-called prolongation attack, which allows extending the tracking period beyond the lifetime of the session resumption mechanism. To show that under the observed browser configurations tracking via TLS session resumptions is feasible, we also looked into DNS data to understand the longest consecutive tracking period for a user by a particular website. Our results indicate that with the standard setting of the session resumption lifetime in many current browsers, the average user can be tracked for up to eight days. With a session resumption lifetime of seven days, as recommended upper limit in the draft for TLS version 1.3, 65\% of all users in our dataset can be tracked permanently.}, series = {{{ACSAC}} '18} } @inproceedings{tanielianSiameseCookieEmbedding2018, title = {Siamese {{Cookie Embedding Networks}} for {{Cross}}-{{Device User Matching}}}, booktitle = {Companion {{Proceedings}} of the {{The Web Conference}} 2018}, author = {Tanielian, Ugo and Tousch, Anne-Marie and Vasile, Flavian}, year = {2018}, month = apr, pages = {85--86}, abstract = {Over the last decade, the number of devices per person has increased substantially. This poses a challenge for cookie-based personalization applications, such as online search and advertising, as it narrows the personalization signal to a single device environment. A key task is to find which cookies belong to the same person to recover a complete cross-device user journey. Recent work on the topic has shown the benefits of using unsupervised embeddings learned on user event sequences. In this paper, we extend this approach to a supervised setting and introduce the Siamese Cookie Embedding Network (SCEmNet), a siamese convolutional architecture that leverages the multi-modal aspect of sequences, and show significant improvement over the state-of-the-art.}, annote = {Verwendbar f\"ur Future Tracking Ecosystem Developments}, series = {{{WWW}} '18} } @misc{tauberAsciimooSearx2020, title = {Asciimoo/Searx}, author = {Tauber, Adam}, year = {2020}, month = jul, url = {https://github.com/asciimoo/searx}, urldate = {2020-07-10}, abstract = {Privacy-respecting metasearch engine. Contribute to asciimoo/searx development by creating an account on GitHub.}, copyright = {AGPL-3.0 License , AGPL-3.0 License}, note = {Accessed 2020-07-10} } @article{trevisanYearsEUCookie2019, title = {4 {{Years}} of {{EU Cookie Law}}: {{Results}} and {{Lessons Learned}}}, shorttitle = {4 {{Years}} of {{EU Cookie Law}}}, author = {Trevisan, Martino and Traverso, Stefano and Bassi, Eleonora and Mellia, Marco}, year = {2019}, month = apr, volume = {2019}, pages = {126--145}, journal = {Proceedings on Privacy Enhancing Technologies}, number = {2} } @article{urbanUnwantedSharingEconomy2018, title = {The {{Unwanted Sharing Economy}}: {{An Analysis}} of {{Cookie Syncing}} and {{User Transparency}} under {{GDPR}}}, shorttitle = {The {{Unwanted Sharing Economy}}}, author = {Urban, Tobias and Tatang, Dennis and Degeling, Martin and Holz, Thorsten and Pohlmann, Norbert}, year = {2018}, month = nov, url = {http://arxiv.org/abs/1811.08660}, urldate = {2019-08-14}, abstract = {The European General Data Protection Regulation (GDPR), which went into effect in May 2018, leads to important changes in this area: companies are now required to ask for users' consent before collecting and sharing personal data and by law users now have the right to gain access to the personal information collected about them. In this paper, we study and evaluate the effect of the GDPR on the online advertising ecosystem. In a first step, we measure the impact of the legislation on the connections (regarding cookie syncing) between third-parties and show that the general structure how the entities are arranged is not affected by the GDPR. However, we find that the new regulation has a statistically significant impact on the number of connections, which shrinks by around 40\%. Furthermore, we analyze the right to data portability by evaluating the subject access right process of popular companies in this ecosystem and observe differences between the processes implemented by the companies and how they interpret the new legislation. We exercised our right of access under GDPR with 36 companies that had tracked us online. Although 32 companies (89\%) we inquired replied within the period defined by law, only 21 (58\%) finished the process by the deadline set in the GDPR. Our work has implications regarding the implementation of privacy law as well as what online tracking companies should do to be more compliant with the new regulation.}, language = {English} } @inproceedings{vangoethemClockStillTicking2015, title = {The {{Clock}} Is {{Still Ticking}}: {{Timing Attacks}} in the {{Modern Web}}}, shorttitle = {The {{Clock}} Is {{Still Ticking}}}, booktitle = {Proceedings of the 22nd {{ACM SIGSAC Conference}} on {{Computer}} and {{Communications Security}}}, author = {Van Goethem, Tom and Joosen, Wouter and Nikiforakis, Nick}, year = {2015}, month = oct, pages = {1382--1393}, abstract = {Web-based timing attacks have been known for over a decade, and it has been shown that, under optimal network conditions, an adversary can use such an attack to obtain information on the state of a user in a cross-origin website. In recent years, desktop computers have given way to laptops and mobile devices, which are mostly connected over a wireless or mobile network. These connections often do not meet the optimal conditions that are required to reliably perform cross-site timing attacks. In this paper, we show that modern browsers expose new side-channels that can be used to acquire accurate timing measurements, regardless of network conditions. Using several real-world examples, we introduce four novel web-based timing attacks against modern browsers and describe how an attacker can use them to obtain personal information based on a user's state on a cross-origin website. We evaluate our proposed attacks and demonstrate that they significantly outperform current attacks in terms of speed, reliability, and accuracy. Furthermore, we show that the nature of our attacks renders traditional defenses, i.e., those based on randomly delaying responses, moot and discuss possible server-side defense mechanisms.}, annote = {CSS:visited property for link differentiation \par Timing attacks over wireless networks}, series = {{{CCS}} '15} } @article{venkatadriInvestigatingSourcesPII2019, title = {Investigating Sources of {{PII}} Used in {{Facebook}}'s Targeted Advertising}, author = {Venkatadri, Giridhari and Lucherini, Elena and Sapiezynski, Piotr and Mislove, Alan}, year = {2019}, month = jan, volume = {2019}, pages = {227--244}, journal = {PoPETs}, number = {1} } @inproceedings{vlajicAnonymityTORUsers2017, title = {Anonymity of {{TOR Users Demystified}}}, booktitle = {2017 {{International Conference}} on {{Computational Science}} and {{Computational Intelligence}} ({{CSCI}})}, author = {Vlajic, Natalija and Madani, Pooria and Nguyen, Ethan}, year = {2017}, month = dec, pages = {109--114}, abstract = {The Onion Routing (TOR) system is often seen as the best anonymity tool out there, and is used by nearly 2.5 million people daily. Many of these users tend to believe that TOR offers more privacy protection than what it is actually intended or able to provide. In this paper, we specifically focus on the TOR browser - one of the two key components of the TOR system. In particular, we demonstrate that, if used in its default settings, the TOR browser provides little if any protection against four most common forms of user tracking. Hence, to achieve true online anonymity, extra efforts and vigilance need to be exercised on the part of the TOR user.} } @misc{w3cDocumentObjectModel1998, title = {Document {{Object Model}} ({{DOM}}) {{Level}} 1 {{Specification}}}, author = {W3C}, year = {1998}, month = oct, url = {https://www.w3.org/TR/REC-DOM-Level-1/}, urldate = {2020-02-09}, abstract = {This specification defines the Document Object Model Level 1, a platform- and language-neutral interface that allows programs and scripts to dynamically access and update the content, structure and style of documents. The Document Object Model provides a standard set of objects for representing HTML and XML documents, a standard model of how these objects can be combined, and a standard interface for accessing and manipulating them. Vendors can support the DOM as an interface to their proprietary data structures and APIs, and content authors can write to the standard DOM interfaces rather than product-specific APIs, thus increasing interoperability on the Web. The goal of the DOM specification is to define a programmatic interface for XML and HTML. The DOM Level 1 specification is separated into two parts: Core and HTML. The Core DOM Level 1 section provides a low-level set of fundamental interfaces that can represent any structured document, as well as defining extended interfaces for representing an XML document. These extended XML interfaces need not be implemented by a DOM implementation that only provides access to HTML documents; all of the fundamental interfaces in the Core section must be implemented. A compliant DOM implementation that implements the extended XML interfaces is required to also implement the fundamental Core interfaces, but not the HTML interfaces. The HTML Level 1 section provides additional, higher-level interfaces that are used with the fundamental interfaces defined in the Core Level 1 section to provide a more convenient view of an HTML document. A compliant implementation of the HTML DOM implements all of the fundamental Core interfaces as well as the HTML interfaces.}, note = {Accessed 2020-02-09} } @misc{w3cTrackingPreferenceExpression2019, title = {Tracking {{Preference Expression}} ({{DNT}})}, author = {W3C}, year = {2019}, month = jan, url = {https://www.w3.org/TR/tracking-dnt/}, urldate = {2020-07-09}, note = {Accessed 2020-07-09} } @misc{w3techsHistoricalYearlyTrends2020, title = {Historical Yearly Trends in the Usage Statistics of Client-Side Programming Languages for Websites}, author = {W3Techs}, year = {2020}, month = feb, url = {https://w3techs.com/technologies/history_overview/client_side_language/all/y}, urldate = {2020-02-17}, note = {Accessed 2020-02-17} } @misc{w3techsUsageStatisticsSilverlight2020, title = {Usage {{Statistics}} of {{Silverlight}} as {{Client}}-Side {{Programming Language}} on {{Websites}}, {{February}} 2020}, author = {W3Techs}, year = {2020}, month = feb, url = {https://w3techs.com/technologies/details/cp-silverlight}, urldate = {2020-02-20}, note = {Accessed 2020-02-20} } @inproceedings{wachsPushAwayYour2018, title = {Push {{Away Your Privacy}}: {{Precise User Tracking Based}} on {{TLS Client Certificate Authentication}}}, shorttitle = {Push {{Away Your Privacy}}}, booktitle = {Proceedings of the {{Applied Networking Research Workshop}}}, author = {Wachs, Matthias and Scheitle, Quirin and Carle, Georg}, year = {2018}, month = jul, pages = {3}, abstract = {While the Transport Layer Security (TLS) protocol is typically used to authenticate servers, it also offers the possibility to use Client Certificates for to authenticate clients (CCA). We investigate the use of CCA based on two specific concerns: First, CCA is prone to being used in a context that encodes personal data into client certificates, such as identifying persons, e.g. in voting systems or VPN applications. Second, in versions prior to TLS1.3, the client certificate (as well as the server certificate) is being sent in clear text, permitting systematic and large-scale eavesdropping. Based on these two concerns, we investigate the use of CCA at an ISP uplink. Besides confirming our two concerns by finding, e.g., person names in VPN certificates, we also identify the Apple Push Notification Service (APNs) to leverage TLS CCA to identify client devices. We consider this use highly critical as APNs is an integral part of all Apple operating systems, and APNs establishes a connection immediately upon connecting the device to a network. We show that these properties can be used by various attacker types to track devices (and hence, likely users) with great precision across the global Internet. This work was published in 2017, with the TLS1.3 standardization still ongoing, and we aimed to emphasize the necessity of encrypting client certificates in the TLS handshake, which was adopted in the TLS1.3 standard. Based on work published at TMA'17 [1]. [1] Matthias Wachs, Quirin Scheitle, Georg Carle. 2017. Push Away Your Privacy: Precise User Tracking Based on TLS Client Certificate Authentication. In Proceedings of the 2017 Network Traffic Measurement and Analysis Conference (TMA '17)}, series = {{{ANRW}} '18} } @article{westMeasuringPrivacyDisclosures2014, title = {Measuring {{Privacy Disclosures}} in {{URL Query Strings}}}, author = {West, Andrew G. and Aviv, Adam J.}, year = {2014}, month = nov, volume = {18}, pages = {52--59}, abstract = {Publicly posted URLs sometimes contain a wealth of information about the identities and activities of the users who share them. URLs often utilize query strings \textendash{} that is, key-value pairs appended to the URL path \textendash{} to pass session parameters and form data. Although often benign and necessary to render the Web page, query strings sometimes contain tracking mechanisms, usernames, email addresses, and other information that users might not wish to publicly reveal. In isolation, this isn't particularly problematic, but the growth of Web 2.0 platforms such as social networks and microblogging means URLs, which are often copied and pasted from Web browsers, are increasingly publicly broadcast. To study URL sharing's privacy ramifications, the authors ran a measurement study that looked at 892 million user-submitted URLs, many disseminated in semipublic forums. That corpus contained a trove of personal information, including 1.7 million email addresses. In the most egregious examples, query strings contain plaintext usernames and passwords for administrative and sensitive accounts. The authors identify data leakage via both key-driven and value-driven analysis using manual inspections and automatic detection logic. Additionally, they analyze the click-through rates of sensitive URLs, examine geographical and mobile behavior patterns, and measure the broader statistical properties of key-value pairs. Finally, they propose a CleanURL service that can "scrub"' URLs of privacy-violating content.}, journal = {IEEE Internet Computing}, number = {6} } @misc{whatwgDOMLivingStandard2020, title = {{{DOM Living Standard}}}, author = {WHATWG}, year = {2020}, month = feb, url = {https://dom.spec.whatwg.org/}, urldate = {2020-02-09}, note = {Accessed 2020-02-09} } @misc{whatwgFormsHTMLStandard2020, title = {Forms - {{HTML Standard}}}, author = {WHATWG}, year = {2020}, month = aug, url = {https://html.spec.whatwg.org/multipage/forms.html}, urldate = {2020-08-10}, note = {Accessed 2020-08-10} } @misc{whatwgHiddenStateHTML2020, title = {Hidden {{State}} - {{HTML Standard}}}, author = {WHATWG}, year = {2020}, month = aug, url = {https://html.spec.whatwg.org/multipage/input.html\#hidden-state-(type\%3Dhidden)}, urldate = {2020-08-10}, note = {Accessed 2020-08-10} } @misc{whatwgHTMLStandard2020, title = {{{HTML Standard}}}, author = {WHATWG}, year = {2020}, month = feb, url = {https://html.spec.whatwg.org/}, urldate = {2020-02-20}, note = {Accessed 2020-02-20} } @misc{whatwgHTMLStandard2020a, title = {{{HTML Standard}}}, author = {WHATWG}, year = {2020}, month = feb, url = {https://html.spec.whatwg.org/\#disk-space-2}, urldate = {2020-02-27}, note = {Accessed 2020-02-27} } @misc{whatwgWindowNameHTML2020, title = {Window.Name - {{HTML Standard}}}, author = {WHATWG}, year = {2020}, month = aug, url = {https://html.spec.whatwg.org/multipage/window-object.html\#dom-name}, urldate = {2020-08-10}, note = {Accessed 2020-08-07} } @inproceedings{wondracekPracticalAttackDeanonymize2010, title = {A {{Practical Attack}} to {{De}}-Anonymize {{Social Network Users}}}, booktitle = {2010 {{IEEE Symposium}} on {{Security}} and {{Privacy}}}, author = {Wondracek, Gilbert and Holz, Thorsten and Kirda, Engin and Kruegel, Christopher}, year = {2010}, month = may, pages = {223--238}, abstract = {Social networking sites such as Facebook, LinkedIn, and Xing have been reporting exponential growth rates and have millions of registered users. In this paper, we introduce a novel de-anonymization attack that exploits group membership information that is available on social networking sites. More precisely, we show that information about the group memberships of a user (i.e., the groups of a social network to which a user belongs) is sufficient to uniquely identify this person, or, at least, to significantly reduce the set of possible candidates. That is, rather than tracking a user's browser as with cookies, it is possible to track a person. To determine the group membership of a user, we leverage well-known web browser history stealing attacks. Thus, whenever a social network user visits a malicious website, this website can launch our de-anonymization attack and learn the identity of its visitors. The implications of our attack are manifold, since it requires a low effort and has the potential to affect millions of social networking users. We perform both a theoretical analysis and empirical measurements to demonstrate the feasibility of our attack against Xing, a medium-sized social network with more than eight million members that is mainly used for business relationships. Furthermore, we explored other, larger social networks and performed experiments that suggest that users of Facebook and LinkedIn are equally vulnerable.}, annote = {Demonstrates impact of history stealing attacks on user privacy} } @inproceedings{xuUCognitoPrivateBrowsing2015, title = {{{UCognito}}: {{Private Browsing}} without {{Tears}}}, shorttitle = {{{UCognito}}}, booktitle = {Proceedings of the 22nd {{ACM SIGSAC Conference}} on {{Computer}} and {{Communications Security}}}, author = {Xu, Meng and Jang, Yeongjin and Xing, Xinyu and Kim, Taesoo and Lee, Wenke}, year = {2015}, month = oct, pages = {438--449}, abstract = {While private browsing is a standard feature, its implementation has been inconsistent among the major browsers. More seriously, it often fails to provide the adequate or even the intended privacy protection. For example, as shown in prior research, browser extensions and add-ons often undermine the goals of private browsing. In this paper, we first present our systematic study of private browsing. We developed a technical approach to identify browser traces left behind by a private browsing session, and showed that Chrome and Firefox do not correctly clear some of these traces. We analyzed the source code of these browsers and discovered that the current implementation approach is to decide the behaviors of a browser based on the current browsing mode (i.e., private or public); but such decision points are scattered throughout the code base. This implementation approach is very problematic because developers are prone to make mistakes given the complexities of browser components (including extensions and add-ons). Based on this observation, we propose a new and general approach to implement private browsing. The main idea is to overlay the actual filesystem with a sandbox filesystem when the browser is in private browsing mode, so that no unintended leakage is allowed and no persistent modification is stored. This approach requires no change to browsers and the OS kernel because the layered sandbox filesystem is implemented by interposing system calls. We have implemented a prototype system called Ucognito on Linux. Our evaluations show that Ucognito, when applied to Chrome and Firefox, stops all known privacy leaks identified by prior work and our current study. More importantly, Ucognito incurs only negligible performance overhead: e.g., 0\%-2.5\% in benchmarks for standard JavaScript and webpage loading.}, series = {{{CCS}} '15} } @misc{youtubeengineeringYouTubeNowDefaults2015, title = {{{YouTube}} Now Defaults to {{HTML5}} {$<$}video{$>$}}, author = {YouTube Engineering}, year = {2015}, month = jan, url = {https://youtube-eng.googleblog.com/2015/01/youtube-now-defaults-to-html5_27.html}, urldate = {2020-02-17}, abstract = {Four years ago, we wrote about YouTube's early support for the HTML5 {$<$}video{$>$} tag and how it performed compared to Flash. At the time, there...}, journal = {YouTube Engineering and Developers Blog}, note = {Accessed 2020-08-10} } @inproceedings{yuTrackingTrackers2016, title = {Tracking the {{Trackers}}}, booktitle = {Proceedings of the 25th {{International Conference}} on {{World Wide Web}}}, author = {Yu, Zhonghao and Macbeth, Sam and Modi, Konark and Pujol, Josep M.}, year = {2016}, month = apr, pages = {121--132}, abstract = {Online tracking poses a serious privacy challenge that has drawn significant attention in both academia and industry. Existing approaches for preventing user tracking, based on curated blocklists, suffer from limited coverage and coarse-grained resolution for classification, rely on exceptions that impact sites' functionality and appearance, and require significant manual maintenance. In this paper we propose a novel approach, based on the concepts leveraged from \$k\$-Anonymity, in which users collectively identify unsafe data elements, which have the potential to identify uniquely an individual user, and remove them from requests. We deployed our system to 200,000 German users running the Cliqz Browser or the Cliqz Firefox extension to evaluate its efficiency and feasibility. Results indicate that our approach achieves better privacy protection than blocklists, as provided by Disconnect, while keeping the site breakage to a minimum, even lower than the community-optimized AdBlock Plus. We also provide evidence of the prevalence and reach of trackers to over 21 million pages of 350,000 unique sites, the largest scale empirical evaluation to date. 95\% of the pages visited contain 3rd party requests to potential trackers and 78\% attempt to transfer unsafe data. Tracker organizations are also ranked, showing that a single organization can reach up to 42\% of all page visits in Germany.}, series = {{{WWW}} '16} } @misc{zypWindowNameTransport2008, title = {Window.Name {{Transport}}}, author = {Zyp, Kris}, year = {2008}, month = jul, url = {https://www.sitepen.com/blog/windowname-transport/}, urldate = {2020-08-10}, abstract = {The window.name transport is a new technique for secure cross-domain browser based data transfer, and can be utilized for creating secure mashups with untrusted sources. window.name is implemented in Dojo in the new dojox.io.windowName module, and it is very easy to make web services available through the window.name protocol. window.name works by loading a cross-domain \ldots}, journal = {SitePen}, note = {Accessed 2020-08-10} }