Finish opt-out/opt-in section

This commit is contained in:
Tobias Eidelpes 2020-07-07 19:56:48 +02:00
parent 02818278bd
commit 06168aaa0a
2 changed files with 36 additions and 147 deletions

View File

@ -48,11 +48,23 @@ European's \gls{GDPR} came into force in 2018, service providers operating in
the European Union are required to ask users for explicit consent before
collecting any data, except when that data is absolutely necessary to ensure
basic functionality. It is not allowed to notify the user that by continuing to
visit the web site, consent to data collection is given. Furthermore, if
consent is not given, the web site provider is not allowed to block the user
from visiting the web site. \citet{sanchez-rolaCanOptOut2019a} show, however,
that tracking is still prevalent and happens already before user consent is
given.
visit the web site, consent to data collection is given. Furthermore, if consent
is not given, the web site provider is not allowed to block the user from
visiting the web site. Even before the \gls{GDPR}, the EU required web sites to
ask for informed consent via the ePrivacy Directive which came into force in
2013. \citet{trevisanYearsEUCookie2019} use their tool \emph{CookieCheck} to
evaluate how many of the surveyed 35.000 sites comply with the legislation put
forth in the ePrivacy Directive. Their findings indicate that almost half (49\%)
of the web sites use profiling technologies without consent. Similarly,
\citet{sanchez-rolaCanOptOut2019a} show that tracking is still prevalent and
happens already before user consent is given after the \gls{GDPR} has been in
force for a year. \citet{huCharacterisingThirdParty2019} come to a a similar
conclusion while only looking at third party tracking: the amount of cookies
stored on a user's computer has not changed significantly since before the
\gls{GDPR}. In yet another survey of the top 500 web sites as ranked by Alexa,
\citet{degelingWeValueYour2019} conclude that the amount of tracking before and
after the \gls{GDPR} stayed the same and only 37 sites ask for consent before
storing any cookies.
\subsection{Clearing Browser History}
\label{subsec:Clearing Browser History}

View File

@ -7,7 +7,6 @@
year = {2014},
month = nov,
pages = {674--689},
address = {{Scottsdale, Arizona, USA}},
abstract = {We present the first large-scale studies of three advanced web tracking mechanisms - canvas fingerprinting, evercookies and use of "cookie syncing" in conjunction with evercookies. Canvas fingerprinting, a recently developed form of browser fingerprinting, has not previously been reported in the wild; our results show that over 5\% of the top 100,000 websites employ it. We then present the first automated study of evercookies and respawning and the discovery of a new evercookie vector, IndexedDB. Turning to cookie syncing, we present novel techniques for detection and analysing ID flows and we quantify the amplification of privacy-intrusive tracking practices due to cookie syncing. Our evaluation of the defensive techniques used by privacy-aware users finds that there exist subtle pitfalls --- such as failing to clear state on multiple browsers at once - in which a single lapse in judgement can shatter privacy defenses. This suggests that even sophisticated users face great difficulties in evading tracking techniques.},
series = {{{CCS}} '14}
}
@ -20,8 +19,7 @@
url = {https://theblog.adobe.com/adobe-flash-update/},
urldate = {2020-02-17},
abstract = {Adobe has long played a leadership role in advancing interactivity and creative content \textendash{} from video, to games and more \textendash{} on the web. Where we've seen a need to pus...},
journal = {Adobe Blog},
language = {en-US}
journal = {Adobe Blog}
}
@inproceedings{akkusNontrackingWebAnalytics2012,
@ -31,7 +29,6 @@
year = {2012},
month = oct,
pages = {687--698},
address = {{Raleigh, North Carolina, USA}},
abstract = {Today, websites commonly use third party web analytics services t obtain aggregate information about users that visit their sites. This information includes demographics and visits to other sites as well as user behavior within their own sites. Unfortunately, to obtain this aggregate information, web analytics services track individual user browsing behavior across the web. This violation of user privacy has been strongly criticized, resulting in tools that block such tracking as well as anti-tracking legislation and standards such as Do-Not-Track. These efforts, while improving user privacy, degrade the quality of web analytics. This paper presents the first design of a system that provides web analytics without tracking. The system gives users differential privacy guarantees, can provide better quality analytics than current services, requires no new organizational players, and is practical to deploy. This paper describes and analyzes the design, gives performance benchmarks, and presents our implementation and deployment across several hundred users.},
series = {{{CCS}} '12}
}
@ -53,7 +50,6 @@
volume = {19},
abstract = {From buying books to finding the perfect partner, we share our most intimate wants and needs with our favourite online systems. But how far should we accept promises of privacy in the face of perso...},
journal = {ACM Trans. Priv. Secur. TOPS},
language = {English},
number = {1}
}
@ -65,7 +61,6 @@
year = {2018},
month = apr,
pages = {1--6},
address = {{Porto, Portugal}},
abstract = {Personally Identifiable Information (PII) is information that can be used on its own or with other information to distinguish or trace an individual's identity. To investigate an application for PII tracking, a reverse engineer has to put considerable effort to reverse engineer an application and discover what an application does with PII. To automate this process and save reverse engineers substantial time and effort, we propose PIITracker which is a new and novel tool that can track PII automatically and capture if any processes are sending PII over the network. This is made possible by 1) whole-system dynamic information flow tracking 2) monitoring specific function and system calls. We analyzed 15 popular chat applications and browsers using PIITracker, and determined that 12 of these applications collect some form of PII.},
series = {{{EuroSec}}'18}
}
@ -90,8 +85,7 @@
url = {https://ashkansoltani.org/2011/08/11/respawn-redux-flash-cookies/},
urldate = {2019-08-22},
abstract = {A detailed technical followup to Flash Cookies and Privacy II, describing the mechanisms behind Hulu/KISSmetrics' respawning practices I thought I'd take the time to elaborate a bit fur\ldots},
journal = {Ashkan Soltani},
language = {English}
journal = {Ashkan Soltani}
}
@techreport{ayensonFlashCookiesPrivacy2011,
@ -100,12 +94,10 @@
author = {Ayenson, Mika D. and Wambach, Dietrich James and Soltani, Ashkan and Good, Nathan and Hoofnagle, Chris Jay},
year = {2011},
month = jul,
address = {{Rochester, NY}},
institution = {{Social Science Research Network}},
url = {https://papers.ssrn.com/abstract=1898390},
urldate = {2020-02-13},
abstract = {In August 2009, we demonstrated that popular websites were using ``Flash cookies'' to track users. Some advertisers had adopted this technology because it allowed persistent tracking even where users had taken steps to avoid web profiling. We also demonstrated ``respawning'' on top sites with Flash technology. This allowed sites to reinstantiate HTTP cookies deleted by a user, making tracking more resistant to users' privacy-seeking behaviors.},
language = {en},
number = {ID 1898390},
type = {{{SSRN Scholarly Paper}}}
}
@ -118,7 +110,6 @@
year = {2020},
month = apr,
pages = {1943--1954},
address = {{Taipei, Taiwan}},
abstract = {Website privacy policies sometimes provide users the option to opt-out of certain collections and uses of their personal data. Unfortunately, many privacy policies bury these instructions deep in their text, and few web users have the time or skill necessary to discover them. We describe a method for the automated detection of opt-out choices in privacy policy text and their presentation to users through a web browser extension. We describe the creation of two corpora of opt-out choices, which enable the training of classifiers to identify opt-outs in privacy policies. Our overall approach for extracting and classifying opt-out choices combines heuristics to identify commonly found opt-out hyperlinks with supervised machine learning to automatically identify less conspicuous instances. Our approach achieves a precision of 0.93 and a recall of 0.9. We introduce Opt-Out Easy, a web browser extension designed to present available opt-out choices to users as they browse the web. We evaluate the usability of our browser extension with a user study. We also present results of a large-scale analysis of opt-outs found in the text of thousands of the most popular websites.},
series = {{{WWW}} '20}
}
@ -143,7 +134,6 @@
url = {https://www.rfc-editor.org/info/rfc6265},
urldate = {2020-02-11},
abstract = {This document defines the HTTP Cookie and Set-Cookie header fields. These header fields can be used by HTTP servers to store state (called cookies) at HTTP user agents, letting the servers maintain a stateful session over the mostly stateless HTTP protocol. Although cookies have many historical infelicities that degrade their security and privacy, the Cookie and Set-Cookie header fields are widely used on the Internet. This document obsoletes RFC 2965.},
language = {en},
number = {6265},
type = {{{RFC}}}
}
@ -156,7 +146,6 @@
volume = {2018},
pages = {85--103},
journal = {PoPETs},
language = {English},
number = {4}
}
@ -169,8 +158,7 @@
urldate = {2019-08-14},
abstract = {Numerous surveys have shown that Web users are concerned about the loss of privacy associated with online tracking. Alarmingly, these surveys also reveal that people are also unaware of the amount of data sharing that occurs between ad exchanges, and thus underestimate the privacy risks associated with online tracking.},
archiveprefix = {arXiv},
eprintclass = {cs},
language = {English}
eprintclass = {cs}
}
@misc{baumanEvercookieApplet2013,
@ -203,7 +191,6 @@
year = {2019},
month = jan,
pages = {213--221},
address = {{Melbourne VIC, Australia}},
abstract = {The overturning of the Internet Privacy Rules by the Federal Communications Commissions (FCC) in late March 2017 allows Internet Service Providers (ISPs) to collect, share and sell their customers' Web browsing data without their consent. With third-party trackers embedded on Web pages, this new rule has put user privacy under more risk. The need arises for users on their own to protect their Web browsing history from any potential adversaries. Although some available solutions such as Tor, VPN, and HTTPS can help users conceal their online activities, their use can also significantly hamper personalized online services, i.e., degraded utility. In this paper, we design an effective Web browsing history anonymization scheme, PBooster, aiming to protect users' privacy while retaining the utility of their Web browsing history. The proposed model pollutes users' Web browsing history by automatically inferring how many and what links should be added to the history while addressing the utility-privacy trade-off challenge. We conduct experiments to validate the quality of the manipulated Web browsing history and examine the robustness of the proposed approach for user privacy protection.},
series = {{{WSDM}} '19}
}
@ -241,7 +228,6 @@
url = {https://www.rfc-editor.org/info/rfc1738},
urldate = {2020-02-06},
abstract = {This document specifies a Uniform Resource Locator (URL), the syntax and semantics of formalized information for location and access of resources via the Internet.},
language = {en},
number = {1738},
type = {{{RFC}}}
}
@ -256,7 +242,6 @@
url = {https://www.rfc-editor.org/info/rfc1630},
urldate = {2020-02-06},
abstract = {This document defines the syntax used by the World-Wide Web initiative to encode the names and addresses of objects on the Internet. This memo provides information for the Internet community. This memo does not specify an Internet standard of any kind.},
language = {en},
number = {1630},
type = {{{RFC}}}
}
@ -268,7 +253,6 @@
year = {2017},
month = oct,
pages = {2607--2609},
address = {{Dallas, Texas, USA}},
abstract = {Billions of users browse the Web on a daily basis, leaving their digital traces on millions of websites. Every such visit, every mouse move or button click may trigger a wide variety of hidden data exchanges across multiple tracking companies. As a result, these companies collect a vast amount of user's data, preferences and habits, that are extremely useful for online advertisers and profitable for data brokers, however very worrisome for the privacy of the users. In this \textbackslash emph\{3-hours tutorial\} we will cover the vide variety of Web tracking technologies, ranging from simple cookies to advanced cross-device fingerprinting. We will describe the main mechanisms behind web tracking and what users can do to protect themselves. Moreover, we will discuss solutions Web developers can use to automatically eliminate tracking from the third-party content they include in their applications. This tutorial will be of interest to a \textbackslash emph\{general audience\} of computer scientists, and \textbackslash emph\{we do not require any specific prerequisite knowledge\} for attendees. We will cover the following tracking mechanisms: \textbackslash begin\{itemize\} \textbackslash item third-party cookie tracking, and other stateful tracking techniques that enables tracking across multiple websites, \textbackslash item cookie respawning that is used to re-create deleted user cookies, \textbackslash item cookie synching that allows trackers and ad agencies to synchronise user IDs across different companies, \textbackslash item browser fingerprinting, including Canvas, WebRTC and AudioContext fingerprinting \textbackslash item cross-browser device fingerprinting, allowing trackers to recognise users across several devices. \textbackslash end\{itemize\} We will then demonstrate prevalence of such techniques on the Web, based on previous research. We will present the advertisement ecosystem and explain how Web technologies are used in advertisement, in particular in Real-Time-Bidding (RTB). We will explain how cookie synching is used in RTB and present recent analysis on how much a user's tracking data is worth. We will discuss the mechanisms the website owners use to automatically interact with the ad agencies, and explain its consequences on user's security and privacy. To help users protect themselves from Web tracking, we will give an overview of existing solutions. We'll start with the browser settings, and show that basic third-party cookie tracking is still possible even in the private browser mode of most common Web browsers. We then present privacy-protecting browser extensions and compare how efficient they are in protection from Web tracking. Then, we'll present possible protection mechanisms based on browser randomisation to protect from advanced fingerprinting techniques. Finally, we will present solutions for Web developers, who want to include third-party content in their websites, but would like to automatically remove any tracking of their users. In particular, we will discuss simple solutions that exist today for social plugins integration, and propose more advanced server-side based solutions that are a result of our own research.},
series = {{{CCS}} '17}
}
@ -282,7 +266,6 @@
volume = {2017},
pages = {133--148},
journal = {PoPETs},
language = {English},
number = {2}
}
@ -295,7 +278,6 @@
pages = {15},
abstract = {Although the Adobe Flash browser plugin steadily lost popularity throughout the last few years, Flash content still regularly appears when browsing the web. Known for its infamous security track record, Flash remains a challenge in making web browsing more secure. In this paper, we present a largescale measurement of the current uses of Flash, based on a crawl of the top 1 million websites. The different types of measurements result in most detailed classification of Flash uses to date. In particular, special attention is payed to Flash usage related to user tracking, as well as to malicious Flash files used by malvertising or exploit kits. We present Garrick, a novel crawling framework, which is based on a full-fledged Mozilla Firefox browser. Garrick is able to mimic any browser, plugin and operating system configuration so that fingerprinting scripts can be tricked to deliver malicious Flash files. Our measurements show that Flash is still used by approximately 7.5\% of the top 1 million websites, with 62\% of the Flash content coming from third-parties such as ad networks. In general, on popular websites Flash usage is higher compared to less prominent websites and a bigger share of Flash content on these sites comes from third-parties. From a security perspective, malicious Flash files served by highly targeted malvertising campaigns are an ongoing challenge.},
journal = {Journal of Wireless Mobile Networks, Ubiquitous Computing, and Dependable Applications},
language = {en},
number = {4}
}
@ -329,7 +311,6 @@
year = {2016},
month = aug,
pages = {567--570},
address = {{Davis, California}},
abstract = {Third party tracking of user behavior via web cookies represents a privacy threat. In this paper we assess this threat through an analysis of anonymized, crowd-sourced cookie data provided by Cookiepedia.co.uk. We find that nearly 45\% of the cookies in the corpus are from Facebook and of the remaining cookies 25\% come from 10 distinct domains. Over 65\% are Maximal Permission cookies (i.e., 3rd party, non-secure, persistent, root-level). Cookiepedia's anonymization of user data presents challenges with respect to modeling site traffic. We further elucidate the privacy issue by conducting targeted crawling campaigns to supplement the Cookiepedia data. We find that the amount of traffic obscured by Cookiepedia's anonymizing procedure varies dramatically from site to site - sometimes obscuring as much as 80\% of traffic. We use the crawls to infer the inverse function of the anonymizing procedure, allowing us to enhance the crowd-sourced dataset while maintaining user anonymity.},
series = {{{ASONAM}} '16}
}
@ -366,8 +347,7 @@
year = {2019},
abstract = {The European Union's General Data Protection Regulation (GDPR) went into effect on May 25, 2018. Its privacy regulations apply to any service and company collecting or processing personal data in Europe. Many companies had to adjust their data handling processes, consent forms, and privacy policies to comply with the GDPR's transparency requirements. We monitored this rare event by analyzing changes on popular websites in all 28 member states of the European Union. For each country, we periodically examined its 500 most popular websites \textendash{} 6,579 in total \textendash{} for the presence of and updates to their privacy policy between December 2017 and October 2018. While many websites already had privacy policies, we find that in some countries up to 15.7 \% of websites added new privacy policies by May 25, 2018, resulting in 84.5 \% of websites having privacy policies. 72.6 \% of websites with existing privacy policies updated them close to the date. After May this positive development slowed down noticeably. Most visibly, 62.1 \% of websites in Europe now display cookie consent notices, 16 \% more than in January 2018. These notices inform users about a site's cookie use and user tracking practices. We categorized all observed cookie consent notices and evaluated 28 common implementations with respect to their technical realization of cookie consent. Our analysis shows that core web security mechanisms such as the same-origin policy pose problems for the implementation of consent according to GDPR rules, and opting out of third-party cookies requires the third party to cooperate. Overall, we conclude that the web became more transparent at the time GDPR came into force, but there is still a lack of both functional and usable mechanisms for users to consent to or deny processing of their personal data on the Internet.},
archiveprefix = {arXiv},
journal = {Proc. 2019 Netw. Distrib. Syst. Secur. Symp.},
language = {English}
journal = {Proc. 2019 Netw. Distrib. Syst. Secur. Symp.}
}
@article{enckTaintDroidInformationFlowTracking2014,
@ -391,7 +371,6 @@
year = {2015},
month = may,
pages = {289--299},
address = {{Florence, Italy}},
abstract = {We study the ability of a passive eavesdropper to leverage "third-party" HTTP tracking cookies for mass surveillance. If two web pages embed the same tracker which tags the browser with a unique cookie, then the adversary can link visits to those pages from the same user (i.e., browser instance) even if the user's IP address varies. Further, many popular websites leak a logged-in user's identity to an eavesdropper in unencrypted traffic. To evaluate the effectiveness of our attack, we introduce a methodology that combines web measurement and network measurement. Using OpenWPM, our web privacy measurement platform, we simulate users browsing the web and find that the adversary can reconstruct 62-73\% of a typical user's browsing history. We then analyze the effect of the physical location of the wiretap as well as legal restrictions such as the NSA's "one-end foreign" rule. Using measurement units in various locations - Asia, Europe, and the United States - we show that foreign users are highly vulnerable to the NSA's dragnet surveillance due to the concentration of third-party trackers in the U.S. Finally, we find that some browser-based privacy tools mitigate the attack while others are largely ineffective.},
series = {{{WWW}} '15}
}
@ -404,7 +383,6 @@
year = {2016},
month = oct,
pages = {1388--1401},
address = {{Vienna, Austria}},
abstract = {We present the largest and most detailed measurement of online tracking conducted to date, based on a crawl of the top 1 million websites. We make 15 types of measurements on each site, including stateful (cookie-based) and stateless (fingerprinting-based) tracking, the effect of browser privacy tools, and the exchange of tracking data between different sites ("cookie syncing"). Our findings include multiple sophisticated fingerprinting techniques never before measured in the wild. This measurement is made possible by our open-source web privacy measurement tool, OpenWPM, which uses an automated version of a full-fledged consumer browser. It supports parallelism for speed and scale, automatic recovery from failures of the underlying browser, and comprehensive browser instrumentation. We demonstrate our platform's strength in enabling researchers to rapidly detect, quantify, and characterize emerging online tracking behaviors.},
series = {{{CCS}} '16}
}
@ -416,9 +394,7 @@
editor = {Karagiannis, Thomas and Dimitropoulos, Xenofontas},
year = {2016},
pages = {30--41},
address = {{Heraklion, Greece}},
abstract = {User tracking has become de facto practice of the Web, however, our understanding of the scale and nature of this practice remains rudimentary. In this paper, we explore the connections amongst all parties of the Web, especially focusing on how trackers share user IDs. Using data collected from both browsing histories of 129 users and active experiments, we identify user-specific IDs that we suspect are used to track users. We find a significant amount of ID-sharing practices across different organisations providing various service categories. Our observations reveal that ID-sharing happens in a large scale regardless of the user profile size and profile condition such as logged-in and logged-out. We unexpectedly observe a higher number of ID-sharing domains when user is logged-out. We believe that our work reveals the huge gap between what is known about user tracking and what is done by this complex and important ecosystem.},
language = {en},
series = {Lecture {{Notes}} in {{Computer Science}}}
}
@ -429,7 +405,6 @@
year = {2000},
month = nov,
pages = {25--32},
address = {{Athens, Greece}},
series = {{{CCS}} '00}
}
@ -453,8 +428,7 @@
year = {2018},
pages = {151--168},
url = {https://www.usenix.org/conference/usenixsecurity18/presentation/franken},
urldate = {2020-02-05},
language = {en}
urldate = {2020-02-05}
}
@misc{frankSessionVariablesCookies2008,
@ -463,8 +437,7 @@
year = {2008},
month = jan,
url = {https://www.thomasfrank.se/sessionvars.html},
urldate = {2020-02-10},
language = {en}
urldate = {2020-02-10}
}
@article{gerberInvestigatingPeoplePrivacy2019,
@ -475,7 +448,6 @@
volume = {2019},
pages = {267--288},
journal = {Proc. Priv. Enhancing Technol.},
language = {English},
number = {3}
}
@ -511,7 +483,6 @@
volume = {2015},
pages = {282--298},
journal = {PoPETs},
language = {English},
number = {2}
}
@ -523,7 +494,6 @@
year = {2020},
month = apr,
pages = {1--12},
address = {{Honolulu, HI, USA}},
abstract = {We conducted an in-lab user study with 24 participants to explore the usefulness and usability of privacy choices offered by websites. Participants were asked to find and use choices related to email marketing, targeted advertising, or data deletion on a set of nine websites that differed in terms of where and how these choices were presented. They struggled with several aspects of the interaction, such as selecting the correct page from a site's navigation menu and understanding what information to include in written opt-out requests. Participants found mechanisms located in account settings pages easier to use than options contained in privacy policies, but many still consulted help pages or sent email to request assistance. Our findings indicate that, despite their prevalence, privacy choices like those examined in this study are difficult for consumers to exercise in practice. We provide design and policy recommendations for making these website opt-out and deletion choices more useful and usable for consumers.},
series = {{{CHI}} '20}
}
@ -544,8 +514,7 @@
pages = {137--141},
abstract = {The recently introduced General Data Protection Regulation (GDPR) requires that when obtaining information online that could be used to identify individuals, their consents must be obtained. Among other things, this affects many common forms of cookies, and users in the EU have been presented with notices asking their approvals for data collection. This paper examines the prevalence of third party cookies before and after GDPR by using two datasets: accesses to top 500 websites according to Alexa.com, and weekly data of cookies placed in users' browsers by websites accessed by 16 UK and China users across one year.},
archiveprefix = {arXiv},
journal = {Proc. 10th ACM Conf. Web Sci. - WebSci 19},
language = {English}
journal = {Proc. 10th ACM Conf. Web Sci. - WebSci 19}
}
@article{ikramSeamlessTrackingFreeWeb2016,
@ -578,7 +547,6 @@
year = {2018},
month = oct,
pages = {329--342},
address = {{Boston, MA, USA}},
abstract = {A tracking flow is a flow between an end user and a Web tracking service. We develop an extensive measurement methodology for quantifying at scale the amount of tracking flows that cross data protection borders, be it national or international, such as the EU28 border within which the General Data Protection Regulation (GDPR) applies. Our methodology uses a browser extension to fully render advertising and tracking code, various lists and heuristics to extract well known trackers, passive DNS replication to get all the IP ranges of trackers, and state-of-the art geolocation. We employ our methodology on a dataset from 350 real users of the browser extension over a period of more than four months, and then generalize our results by analyzing billions of web tracking flows from more than 60 million broadband and mobile users from 4 large European ISPs. We show that the majority of tracking flows cross national borders in Europe but, unlike popular belief, are pretty well confined within the larger GDPR jurisdiction. Simple DNS redirection and PoP mirroring can increase national confinement while sealing almost all tracking flows within Europe. Last, we show that cross boarder tracking is prevalent even in sensitive and hence protected data categories and groups including health, sexual orientation, minors, and others.},
series = {{{IMC}} '18}
}
@ -592,8 +560,7 @@
urldate = {2019-08-14},
abstract = {We turn our a ention to the elephant in the room of data protection, which is none other than the simple and obvious question: ``Who's tracking sensitive domains?''. Despite a fast-growing amount of work on more complex facets of the interplay between privacy and the business models of the Web, the obvious question of who collects data on domains where most people would prefer not be seen, has received rather limited a ention. First, we develop a methodology for automatically annotating websites that belong to a sensitive category, e.g., as de ned by the General Data Protection Regulation (GDPR). en, we extract the third party tracking services included directly, or via recursive inclusions, by the above mentioned sites. Having analyzed around 30k sensitive domains, we show that such domains are tracked, albeit less intensely than the mainstream ones. Looking in detail at the tracking services operating on them, we nd well known names, as well as some less known ones, including some specializing on speci c sensitive categories.},
archiveprefix = {arXiv},
eprintclass = {cs},
language = {English}
eprintclass = {cs}
}
@article{iqbalAdGraphGraphBasedApproach2018,
@ -606,8 +573,7 @@
urldate = {2019-08-14},
abstract = {User demand for blocking advertising and tracking online is large and growing. Existing tools, both deployed and described in research, have proven useful, but lack either the completeness or robustness needed for a general solution. Existing detection approaches generally focus on only one aspect of advertising or tracking (e.g. URL patterns, code structure), making existing approaches susceptible to evasion.},
archiveprefix = {arXiv},
eprintclass = {cs},
language = {English}
eprintclass = {cs}
}
@inproceedings{iqbalAdWarsRetrospective2017,
@ -618,7 +584,6 @@
year = {2017},
month = nov,
pages = {171--183},
address = {{London, United Kingdom}},
abstract = {The increasing popularity of adblockers has prompted online publishers to retaliate against adblock users by deploying anti-adblock scripts, which detect adblock users and bar them from accessing content unless they disable their adblocker. To circumvent anti-adblockers, adblockers rely on manually curated anti-adblock filter lists for removing anti-adblock scripts. Anti-adblock filter lists currently rely on informal crowdsourced feedback from users to add/remove filter list rules. In this paper, we present the first comprehensive study of anti-adblock filter lists to analyze their effectiveness against anti-adblockers. Specifically, we compare and contrast the evolution of two popular anti-adblock filter lists. We show that these filter lists are implemented very differently even though they currently have a comparable number of filter list rules. We then use the Internet Archive's Wayback Machine to conduct a retrospective coverage analysis of these filter lists on Alexa top-5K websites over the span of last five years. We find that the coverage of these filter lists has considerably improved since 2014 and they detect anti-adblockers on about 9\% of Alexa top-5K websites. To improve filter list coverage and speedup addition of new filter rules, we also design and implement a machine learning based method to automatically detect anti-adblock scripts using static JavaScript code analysis.},
series = {{{IMC}} '17}
}
@ -630,7 +595,6 @@
year = {2006},
month = may,
pages = {737--744},
address = {{Edinburgh, Scotland}},
abstract = {Through a variety of means, including a range of browser cache methods and inspecting the color of a visited hyperlink, client-side browser state can be exploited to track users against their wishes. This tracking is possible because persistent, client-side browser state is not properly partitioned on per-site basis in current browsers. We address this problem by refining the general notion of a "same-origin" policy and implementing two browser extensions that enforce this policy on the browser cache and visited links.We also analyze various degrees of cooperation between sites to track users, and show that even if long-term browser state is properly partitioned, it is still possible for sites to use modern web features to bounce users between sites and invisibly engage in cross-domain tracking of their visitors. Cooperative privacy attacks are an unavoidable consequence of all persistent browser state that affects the behavior of the browser, and disabling or frequently expiring this state is the only way to achieve true privacy against colluding parties.},
series = {{{WWW}} '06}
}
@ -642,9 +606,7 @@
editor = {Gritzalis, Dimitris and Preneel, Bart and Theoharidou, Marianthi},
year = {2010},
pages = {215--231},
address = {{Berlin, Heidelberg}},
abstract = {Web browser history detection using CSS visited styles has long been dismissed as an issue of marginal impact. However, due to recent changes in Web usage patterns, coupled with browser performance improvements, the long-standing issue has now become a significant threat to the privacy of Internet users.In this paper we analyze the impact of CSS-based history detection and demonstrate the feasibility of conducting practical attacks with minimal resources. We analyze Web browser behavior and detectability of content loaded via standard protocols and with various HTTP response codes. We develop an algorithm for efficient examination of large link sets and evaluate its performance in modern browsers. Compared to existing methods our approach is up to 6 times faster, and is able to detect up to 30,000 visited links per second.We present a novel Web application capable of effectively detecting clients' browsing histories and discuss real-world results obtained from 271,576 Internet users. Our results indicate that at least 76\% of Internet users are vulnerable to history detection, including over 94\% of Google Chrome users; for a test of most popular Internet websites we were able to detect, on average, 62.6 (median 22) visited locations per client. We also demonstrate the potential to profile users based on social news stories they visited, and to detect private data such as zipcodes or search queries typed into online forms.},
language = {en},
series = {Lecture {{Notes}} in {{Computer Science}}}
}
@ -669,7 +631,6 @@
year = {2016},
month = nov,
pages = {111--117},
address = {{Santa Monica, California, USA}},
abstract = {Users on today's Internet are subjected to a barrage of advertising and privacy concerning practices. However there is a gap in understanding the treatment of not-logged-in and logged-in users on websites. To address this gap in understanding, we create accounts and crawl 345 popular websites from 14 Alexa website categories while both not-logged-in and logged-in to determine -- for the first time -- how users are treated when logged-in versus not-logged-in with respect to the types and rat of ads and privacy concerns. We establish that logged-in users are treated to more ads and more privacy concerns on average and the website category greatly impacts the amount of ads/concerns users are subjected to. We also note that 42\textbackslash\% of website crawled leaked PII and identify that age/gender/zipcode are more valued than user/first/last name personal information. Finally, we observe that 463 unique third parties received at least one piece of PII, indicating that user PII is leaked more aggressively than previously known.},
series = {{{IMC}} '16}
}
@ -720,23 +681,10 @@
primaryClass = {cs}
}
@article{kelbertDataUsageControl2018,
title = {Data {{Usage Control}} for {{Distributed Systems}}},
author = {Kelbert, Florian and Pretschner, Alexander},
year = {2018},
month = apr,
volume = {21},
pages = {12:1-12:32},
abstract = {Data usage control enables data owners to enforce policies over how their data may be used after they have been released and accessed. We address distributed aspects of this problem, which arise if the protected data reside within multiple systems. We contribute by formalizing, implementing, and evaluating a fully decentralized system that (i) generically and transparently tracks protected data across systems, (ii) propagates data usage policies along, and (iii) efficiently and preventively enforces policies in a decentralized manner. The evaluation shows that (i) dataflow tracking and policy propagation achieve a throughput of 21\textendash 54\% of native execution and (ii) decentralized policy enforcement outperforms a centralized approach in many situations.},
journal = {ACM Trans Priv Secur},
number = {3}
}
@article{kitchenhamProceduresPerformingSystematic,
title = {Procedures for {{Performing Systematic Reviews}}},
author = {Kitchenham, Barbara},
pages = {33},
language = {English}
pages = {33}
}
@inproceedings{kleinDNSCacheBasedUser2019,
@ -744,17 +692,14 @@
booktitle = {Proceedings 2019 {{Network}} and {{Distributed System Security Symposium}}},
author = {Klein, Amit and Pinkas, Benny},
year = {2019},
address = {{San Diego, CA}},
abstract = {We describe a novel user tracking technique that is based on assigning statistically unique DNS records per user. This new tracking technique is unique in being able to distinguish between machines that have identical hardware and software, and track users even if they use ``privacy mode'' browsing, or use multiple browsers (on the same machine).},
language = {en}
abstract = {We describe a novel user tracking technique that is based on assigning statistically unique DNS records per user. This new tracking technique is unique in being able to distinguish between machines that have identical hardware and software, and track users even if they use ``privacy mode'' browsing, or use multiple browsers (on the same machine).}
}
@article{kontaxisTrackingProtectionFirefox,
title = {Tracking {{Protection}} in {{Firefox For Privacy}} and {{Performance}}},
author = {Kontaxis, Georgios and Chew, Monica},
pages = {4},
abstract = {We present Tracking Protection in the Mozilla Firefox web browser. Tracking Protection is a new privacy technology to mitigate invasive tracking of users' online activity by blocking requests to tracking domains. We evaluate our approach and demonstrate a 67.5\% reduction in the number of HTTP cookies set during a crawl of the Alexa top 200 news sites. Since Firefox does not download and render content from tracking domains, Tracking Protection also enjoys performance benefits of a 44\% median reduction in page load time and 39\% reduction in data usage in the Alexa top 200 news sites.},
language = {en}
abstract = {We present Tracking Protection in the Mozilla Firefox web browser. Tracking Protection is a new privacy technology to mitigate invasive tracking of users' online activity by blocking requests to tracking domains. We evaluate our approach and demonstrate a 67.5\% reduction in the number of HTTP cookies set during a crawl of the Alexa top 200 news sites. Since Firefox does not download and render content from tracking domains, Tracking Protection also enjoys performance benefits of a 44\% median reduction in page load time and 39\% reduction in data usage in the Alexa top 200 news sites.}
}
@article{krishnamurthyLeakagePersonallyIdentifiable2010,
@ -797,7 +742,6 @@
url = {https://www.rfc-editor.org/info/rfc2109},
urldate = {2020-02-11},
abstract = {This document specifies a way to create a stateful session with HTTP requests and responses. It describes two new headers, Cookie and Set- Cookie, which carry state information between participating origin servers and user agents. The method described here differs from Netscape's Cookie proposal, but it can interoperate with HTTP/1.0 user agents that use Netscape's method.},
language = {en},
number = {2109},
type = {{{RFC}}}
}
@ -812,7 +756,6 @@
url = {https://www.rfc-editor.org/info/rfc2965},
urldate = {2020-02-11},
abstract = {This document specifies a way to create a stateful session with Hypertext Transfer Protocol (HTTP) requests and responses.},
language = {en},
number = {2965},
type = {{{RFC}}}
}
@ -826,7 +769,6 @@
pages = {105--125},
abstract = {Many anonymous communication networks (ACNs) with different privacy goals have been developed. Still, there are no accepted formal definitions of privacy goals, and ACNs often define their goals ad hoc. However, the formal definition of privacy goals benefits the understanding and comparison of different flavors of privacy and, as a result, the improvement of ACNs. In this paper, we work towards defining and comparing privacy goals by formalizing them as privacy notions and identifying their building blocks. For any pair of notions we prove whether one is strictly stronger, and, if so, which. Hence, we are able to present a complete hierarchy. Using this rigorous comparison between notions, we revise inconsistencies between the existing works and improve the understanding of privacy goals.},
journal = {Proc. Priv. Enhancing Technol.},
language = {English},
number = {2}
}
@ -838,7 +780,6 @@
year = {2012},
month = may,
pages = {589--598},
address = {{Austin, Texas, USA}},
abstract = {We present results of a 45-participant laboratory study investigating the usability of nine tools to limit online behavioral advertising (OBA). We interviewed participants about OBA and recorded their behavior and attitudes as they configured and used a privacy tool, such as a browser plugin that blocks requests to specific URLs, a tool that sets browser cookies indicating a user's preference to opt out of OBA, or the privacy settings built into a web browser. We found serious usability flaws in all tools we tested. Participants found many tools difficult to configure, and tools' default settings were often minimally protective. Ineffective communication, confusing interfaces, and a lack of feedback led many participants to conclude that a tool was blocking OBA when they had not properly configured it to do so. Without being familiar with many advertising companies and tracking technologies, it was difficult for participants to use the tools effectively.},
series = {{{CHI}} '12}
}
@ -851,7 +792,6 @@
year = {2016},
month = nov,
pages = {365--372},
address = {{Santa Monica, California, USA}},
abstract = {Many popular, free online services provide cross-platform interfaces via Web browsers as well as apps on iOS and Android. To monetize these services, many additionally include tracking and advertising libraries that gather information about users with significant privacy implications. Given that the Web-based and mobile-app-based ecosystems evolve independently, an important open question is how these platforms compare with respect to user privacy. In this paper, we conduct the first head-to-head study of 50 popular, free online services to understand which is better for privacy---Web or app? We conduct manual tests, extract personally identifiable information (PII) shared over plaintext and encrypted connections, and analyze the data to understand differences in user-data collection across platforms for the same service. While we find that all platforms expose users' data, there are still opportunities to significantly limit how much information is shared with other parties by selectively using the app or Web version of a service.},
series = {{{IMC}} '16}
}
@ -876,9 +816,7 @@
editor = {Mirkovic, Jelena and Liu, Yong},
year = {2015},
pages = {277--289},
address = {{Cham}},
abstract = {Even though most web users assume that only the websites that they visit directly become aware of the visit, this belief is incorrect. Many website display contents hosted externally by third-party websites, which can track users and become aware of their web-surfing behavior. This phenomenon is called third-party tracking, and although such activities violate no law, they raise privacy concerns because the tracking is carried out without users' knowledge or explicit approval. Our work provides a systematic study of the third-party tracking phenomenon. First, we develop TrackAdvisor, arguably the first method that utilizes Machine Learning to identify the HTTP requests carrying sensitive information to third-party trackers with very high accuracy (100 \% Recall and 99.4 Precision). Microsoft's Tracking Protection Lists, which is a widely-used third-party tracking blacklist achieves only a Recall of 72.2 \%. Second, we quantify the pervasiveness of the third-party tracking phenomenon: 46 \% of the home pages of the websites in Alexa Global Top 10,000 have at least one third-party tracker, and Google, using third-party tracking, monitors 25 \% of these popular websites. Our overarching goal is to measure accurately how widespread third-party tracking is and hopefully would raise the public awareness to its potential privacy risks.},
language = {en},
series = {Lecture {{Notes}} in {{Computer Science}}}
}
@ -890,7 +828,6 @@
year = {2011},
month = nov,
pages = {61--70},
address = {{Berlin, Germany}},
abstract = {The sharing of personal data has emerged as a popular activity over online social networking sites like Facebook. As a result, the issue of online social network privacy has received significant attention in both the research literature and the mainstream media. Our overarching goal is to improve defaults and provide better tools for managing privacy, but we are limited by the fact that the full extent of the privacy problem remains unknown; there is little quantification of the incidence of incorrect privacy settings or the difficulty users face when managing their privacy. In this paper, we focus on measuring the disparity between the desired and actual privacy settings, quantifying the magnitude of the problem of managing privacy. We deploy a survey, implemented as a Facebook application, to 200 Facebook users recruited via Amazon Mechanical Turk. We find that 36\% of content remains shared with the default privacy settings. We also find that, overall, privacy settings match users' expectations only 37\% of the time, and when incorrect, almost always expose content to more users than expected. Finally, we explore how our results have potential to assist users in selecting appropriate privacy settings by examining the user-created friend lists. We find that these have significant correlation with the social network, suggesting that information from the social network may be helpful in implementing new tools for managing privacy.},
series = {{{IMC}} '11}
}
@ -903,7 +840,6 @@
year = {2013},
month = nov,
pages = {279--284},
address = {{Berlin, Germany}},
abstract = {The task of protecting users' privacy is made more difficult by their attitudes towards information disclosure without full awareness and the economics of the tracking and advertising industry. Even after numerous press reports and widespread disclosure of leakages on the Web and on popular Online Social Networks, many users appear not be fully aware of the fact that their information may be collected, aggregated and linked with ambient information for a variety of purposes. Past attempts at alleviating this problem have addressed individual aspects of the user's data collection. In this paper we move towards a comprehensive and efficient client-side tool that maximizes users' awareness of the extent of their information leakage. We show that such a customizable tool can help users to make informed decisions on controlling their privacy footprint.},
series = {{{WPES}} '13}
}
@ -915,23 +851,10 @@
year = {2018},
month = oct,
pages = {2243--2245},
address = {{Toronto, Canada}},
abstract = {Online tracking of children by third-parties is strictly regulated by law in many regions of the world (e.g., COPPA in USA and GDPR in EU), and in a large number of situations constitutes criminal activity. Unfortunately, the existence of these laws does not seem to be an effective deterrence. In this paper, we provide a brief summary of our findings pertaining to the effectiveness of four popular browser add-ons in protecting against third-party tracking on a select number of children-oriented Web-sites. The obtain results show that protection from tracking by a browser add-on is generally achieved at the expense of Web-page performance. In other words, add-ons that are effective at blocking third-party trackers will often adversely affect the normal functioning of the visited Web-page(s). In addition, our results also show that when it comes to user/children tracking by well-known 'tech giants', all four add-ons are likely to provide only limited protection.},
series = {{{CCS}} '18}
}
@article{mavroudisPrivacySecurityUltrasound2017,
title = {On the {{Privacy}} and {{Security}} of the {{Ultrasound Ecosystem}}},
author = {Mavroudis, Vasilios and Hao, Shuang and Fratantonio, Yanick and Maggi, Federico and Kruegel, Christopher and Vigna, Giovanni},
year = {2017},
month = apr,
volume = {2017},
pages = {95--112},
journal = {PoPETs},
language = {English},
number = {2}
}
@inproceedings{mayerThirdPartyWebTracking2012,
title = {Third-{{Party Web Tracking}}: {{Policy}} and {{Technology}}},
shorttitle = {Third-{{Party Web Tracking}}},
@ -973,7 +896,6 @@
volume = {2017},
pages = {130--146},
journal = {PoPETs},
language = {English},
number = {3}
}
@ -994,7 +916,6 @@
volume = {2018},
pages = {5--32},
journal = {PoPETs},
language = {English},
number = {4}
}
@ -1004,9 +925,7 @@
author = {Olejnik, Lukasz and Tran, Minh-Dung and Castelluccia, Claude},
year = {2014},
month = feb,
address = {{San Diego, CA}},
abstract = {Real-Time Bidding (RTB) and Cookie Matching (CM) are transforming the advertising landscape to an extremely dynamic market and make targeted advertising considerably permissive. The emergence of these technologies allows companies to exchange user data as a product and therefore raises important concerns from privacy perspectives. In this paper, we perform a privacy analysis of CM and RTB and quantify the leakage of users' browsing histories due to these mechanisms. We study this problem on a corpus of users' Web histories, and show that using these technologies, certain companies can significantly improve their tracking and profiling capabilities. We detect \$41\$ companies serving ads via RTB and over \$125\$ using Cookie Matching. We show that \$91\textbackslash\%\$ of users in our dataset were affected by CM and in certain cases, \$27\textbackslash\%\$ of users' Web browsing histories could be leaked to 3rd-party companies through RTB. We expose a design characteristic of RTB systems to observe the prices which advertisers pay for serving ads to Web users. We leverage this feature and provide important insights into these prices by analyzing different user profiles and visiting contexts. Our study shows the variation of prices according to context information including visiting site, time and user's physical location. We experimentally confirm that users with known Web browsing history are evaluated higher than new comers, that some user profiles are more valuable than others, and that users' intents, such as looking for a commercial product, are sold at higher prices than users' Web browsing histories. In addition, we show that there is a huge gap between users' perception of the value of their personal information and its actual value on the market. A recent study by Carrascal et al. showed that, on average, users evaluate the price of the disclosure of their presence on a Web site to EUR 7. We show that user's Web browsing history elements are routinely being sold off for less than \$0.0005.},
language = {en}
abstract = {Real-Time Bidding (RTB) and Cookie Matching (CM) are transforming the advertising landscape to an extremely dynamic market and make targeted advertising considerably permissive. The emergence of these technologies allows companies to exchange user data as a product and therefore raises important concerns from privacy perspectives. In this paper, we perform a privacy analysis of CM and RTB and quantify the leakage of users' browsing histories due to these mechanisms. We study this problem on a corpus of users' Web histories, and show that using these technologies, certain companies can significantly improve their tracking and profiling capabilities. We detect \$41\$ companies serving ads via RTB and over \$125\$ using Cookie Matching. We show that \$91\textbackslash\%\$ of users in our dataset were affected by CM and in certain cases, \$27\textbackslash\%\$ of users' Web browsing histories could be leaked to 3rd-party companies through RTB. We expose a design characteristic of RTB systems to observe the prices which advertisers pay for serving ads to Web users. We leverage this feature and provide important insights into these prices by analyzing different user profiles and visiting contexts. Our study shows the variation of prices according to context information including visiting site, time and user's physical location. We experimentally confirm that users with known Web browsing history are evaluated higher than new comers, that some user profiles are more valuable than others, and that users' intents, such as looking for a commercial product, are sold at higher prices than users' Web browsing histories. In addition, we show that there is a huge gap between users' perception of the value of their personal information and its actual value on the market. A recent study by Carrascal et al. showed that, on average, users evaluate the price of the disclosure of their presence on a Web site to EUR 7. We show that user's Web browsing history elements are routinely being sold off for less than \$0.0005.}
}
@inproceedings{papadopoulosCookieSynchronizationEverything2019,
@ -1017,7 +936,6 @@
year = {2019},
month = may,
pages = {1432--1442},
address = {{San Francisco, CA, USA}},
abstract = {User data is the primary input of digital advertising, fueling the free Internet as we know it. As a result, web companies invest a lot in elaborate tracking mechanisms to acquire user data that can sell to data markets and advertisers. However, with same-origin policy and cookies as a primary identification mechanism on the web, each tracker knows the same user with a different ID. To mitigate this, Cookie Synchronization (CSync) came to the rescue, facilitating an information sharing channel between 3rd-parties that may or not have direct access to the website the user visits. In the background, with CSync, they merge user data they own, but also reconstruct a user's browsing history, bypassing the same origin policy. In this paper, we perform a first to our knowledge in-depth study of CSync in the wild, using a year-long weblog from 850 real mobile users. Through our study, we aim to understand the characteristics of the CSync protocol and the impact it has on web users' privacy. For this, we design and implement CONRAD, a holistic mechanism to detect CSync events at real time, and the privacy loss on the user side, even when the synced IDs are obfuscated. Using CONRAD, we find that 97\% of the regular web users are exposed to CSync: most of them within the first week of their browsing, and the median userID gets leaked, on average, to 3.5 different domains. Finally, we see that CSync increases the number of domains that track the user by a factor of 6.75.},
series = {{{WWW}} '19}
}
@ -1030,7 +948,6 @@
year = {2018},
month = apr,
pages = {1--6},
address = {{Porto, Portugal}},
abstract = {In recent years, and after the Snowden revelations, there has been a significant movement in the web from organizations, policymakers and individuals to enhance the privacy awareness among users. As a consequence, more and more publishers support TLS in their websites, and vendors provide privacy and anonymity tools, such as secure VPNs or Tor onions, to cover the need of users for privacy-preserving web browsing. But is the sporadic appliance of such tools enough to provide privacy? In this paper, we describe two privacy-breaching threats against users accessing the Internet over a secure VPN. The breaches are made possible through Cookie Synchronization, nowadays widely used by third parties for advertisement and tracking purposes. The generated privacy leaks can be used by a snooping entity such as an ISP, to re-identify a user in the web and reveal their browsing history even when users are hidden behind a VPN. By probing the top 12K Alexa sites, we find that 1 out of 13 websites expose their users to these privacy leaks.},
series = {{{EuroSec}}'18}
}
@ -1043,7 +960,6 @@
year = {2013},
month = dec,
pages = {49--58},
address = {{New Orleans, Louisiana, USA}},
abstract = {Over the past few years, microblogging social networking services have become a popular means for information sharing and communication. Besides sharing information among friends, such services are currently being used by artists, politicians, news channels, and information providers to easily communicate with their constituency. Even though following specific channels on a microblogging service enables users to receive interesting information in a timely manner, it may raise significant privacy concerns as well. For example, the microblogging service is able to observe all the channels that a particular user follows. This way, it can infer all the subjects a user might be interested in and generate a detailed profile of this user. This knowledge can be used for a variety of purposes that are usually beyond the control of the users. To address these privacy concerns, we propose k-subscription: an obfuscation-based approach that enables users to follow privacy-sensitive channels, while, at the same time, making it difficult for the microblogging service to find out their actual interests. Our method relies on obfuscation: in addition to each privacy-sensitive channel, users are encouraged to randomly follow k -- 1 other channels they are not interested in. In this way (i) their actual interests are hidden in random selections, and (ii) each user contributes in hiding the real interests of other users. Our analysis indicates that k-subscription makes it difficult for attackers to pinpoint a user's interests with significant confidence. We show that this confidence can be made predictably small by slightly adjusting k while adding a reasonably low overhead on the user's system.},
series = {{{ACSAC}} '13}
}
@ -1063,7 +979,6 @@
year = {2015},
month = oct,
pages = {93--106},
address = {{Tokyo, Japan}},
abstract = {Content and services which are offered for free on the Internet are primarily monetized through online advertisement. This business model relies on the implicit agreement between content providers and users where viewing ads is the price for the "free" content. This status quo is not acceptable to all users, however, as manifested by the rise of ad-blocking plugins which are available for all popular Web browsers. Indeed, ad-blockers have the potential to substantially disrupt the widely established business model of "free" content, currently one of the core elements on which the Web is built. In this work, we shed light on how users interact with ads. We show how to leverage the functionality of AdBlock Plus, one of the most popular ad-blockers to identify ad traffic from passive network measurements. We complement previous work, which focuses on active measurements, by characterizing ad-traffic in the wild, i.e., as seen in a residential broadband network of a major European ISP. Finally, we assess the prevalence of ad-blockers in this particular network and discuss possible implications for content providers and ISPs.},
series = {{{IMC}} '15}
}
@ -1085,7 +1000,6 @@
year = {2011},
month = oct,
pages = {667--676},
address = {{Chicago, Illinois, USA}},
abstract = {Online tracking of users in support of behavioral advertising is widespread. Several researchers have proposed non-tracking online advertising systems that go well beyond the requirements of the Do-Not-Track initiative launched by the US Federal Trace Commission (FTC). The primary goal of these systems is to allow for behaviorally targeted advertising without revealing user behavior (clickstreams) or user profiles to the ad network. Although these designs purport to be practical solutions, none of them adequately consider the role of the ad auctions, which today are central to the operation of online advertising systems. This paper looks at the problem of running auctions that leverage user profiles for ad ranking while keeping the user profile private. We define the problem, broadly explore the solution space, and discuss the pros and cons of these solutions. We analyze the performance of our solutions using data from Microsoft Bing advertising auctions. We conclude that, while none of our auctions are ideal in all respects, they are adequate and practical solutions.},
series = {{{CCS}} '11}
}
@ -1097,7 +1011,6 @@
year = {2012},
month = apr,
pages = {12},
address = {{San Jose, CA}},
abstract = {While third-party tracking on the web has garnered much attention, its workings remain poorly understood. Our goal is to dissect how mainstream web tracking occurs in the wild. We develop a client-side method for detecting and classifying five kinds of third-party trackers based on how they manipulate browser state. We run our detection system while browsing the web and observe a rich ecosystem, with over 500 unique trackers in our measurements alone. We find that most commercial pages are tracked by multiple parties, trackers vary widely in their coverage with a small number being widely deployed, and many trackers exhibit a combination of tracking behaviors. Based on web search traces taken from AOL data, we estimate that several trackers can each capture more than 20\% of a user's browsing behavior. We further assess the impact of defenses on tracking and find that no existing browser mechanisms prevent tracking by social media sites via widgets while still allowing those widgets to achieve their utility goals, which leads us to develop a new defense. To the best of our knowledge, our work is the most complete study of web tracking to date.},
series = {{{NSDI}}'12}
}
@ -1119,9 +1032,7 @@
editor = {Backes, Michael and Ning, Peng},
year = {2009},
pages = {86--103},
address = {{Berlin, Heidelberg}},
abstract = {This paper explores the problem of tracking information flow in dynamic tree structures. Motivated by the problem of manipulating the Document Object Model (DOM) trees by browser-run client-side scripts, we address the dynamic nature of interactions via tree structures. We present a runtime enforcement mechanism that monitors this interaction and prevents a range of attacks, some of them missed by previous approaches, that exploit the tree structure in order to transfer sensitive information. We formalize our approach for a simple language with DOM-like tree operations and show that the monitor prevents scripts from disclosing secrets.},
language = {en},
series = {Lecture {{Notes}} in {{Computer Science}}}
}
@ -1133,8 +1044,7 @@
volume = {87},
pages = {101569},
abstract = {Several past measurement studies uncovered various aspects of web-based tracking and its serious impact on user privacy. Most studies used institutional resources, e.g., computers hosted at well-known universities, or cloud-computing infrastructures such as Amazon EC2, confining the study to a particular geolocation or a few locations. Would there be any difference if web tracking is measured from actual user-owned residential machines? Does a user's geolocation affect web tracking? Past studies do not adequately answer these important questions, although web users come from across the globe, and tracking primarily targets home users. As a step forward, we leverage the Luminati proxy service to run a measurement study using residential machines from 56 countries. We rely on the OpenWPM web privacy measurement framework to analyze third-party scripts and cookies in 2050 distinct URLs (Alexa Top-1000 home pages and Alexa Top-50 country-specific home pages for all 56 countries, and shared URLs via Twitter from Alexa Top-1000 domains for 10 countries). Our findings reveal that the prevalence of web tracking varies across the globe. In addition to location, tracking also seems to depend on factors such as data privacy policies, Internet speed and censorship. We also observe that despite legal efforts for strengthening privacy, such as the EU cookie law, violations are common and very blatant in some cases, highlighting the need for more effective tools and frameworks for compliance monitoring and enforcement.},
journal = {Computers \& Security},
language = {en}
journal = {Computers \& Security}
}
@inproceedings{sanchez-rolaBakingTimerPrivacyAnalysis2019,
@ -1145,7 +1055,6 @@
year = {2019},
month = dec,
pages = {478--488},
address = {{San Juan, Puerto Rico}},
abstract = {Cookies were originally introduced as a way to provide state awareness to websites, and are now one of the backbones of the current web. However, their use is not limited to store the login information or to save the current state of user browsing. In several cases, third-party cookies are deliberately used for web tracking, user analytics, and for online advertisement, with the subsequent privacy loss for the end users. However, cookies are not the only technique capable of retrieving the users' browsing history. In fact, history sniffing techniques are capable of tracking the users' browsing history without relying on any specific code in a third-party website, but only on code executed within the visited site. Many sniffing techniques have been proposed to date, but they usually have several limitations and they are not able to differentiate between multiple possible states within the target application. In this paper we propose BakingTimer, a new history sniffing technique based on timing the execution of server-side request processing code. This method is capable of retrieving partial or complete user browsing history, it does not require any permission, and it can be performed through both first and third-party scripts. We studied the impact of our timing side-channel attack to detect prior visits to websites, and discovered that it was capable of detecting the users state in more than half of the 10K websites analyzed, which is the largest test performed to date to test this type of techniques. We additionally performed a manual analysis to check the capabilities of the attack to differentiate between three states: never accessed, accessed and logged in. Moreover, we performed a set of stability tests, to verify that our time measurements are robust with respect to changes both in the network RTT and in the servers workload.},
series = {{{ACSAC}} '19}
}
@ -1158,7 +1067,6 @@
year = {2019},
month = jul,
pages = {340--351},
address = {{Auckland, New Zealand}},
abstract = {The European Union's (EU) General Data Protection Regulation (GDPR), in effect since May 2018, enforces strict limitations on handling users' personal data, hence impacting their activity tracking on the Web. In this study, we perform an evaluation of the tracking performed in 2,000 high-traffic websites, hosted both inside and outside of the EU. We evaluate both the information presented to users and the actual tracking implemented through cookies; we find that the GDPR has impacted website behavior in a truly global way, both directly and indirectly: USA-based websites behave similarly to EU-based ones, while third-party opt-out services reduce the amount of tracking even for websites which do not put any effort in respecting the new law. On the other hand, we find that tracking remains ubiquitous. In particular, we found cookies that can identify users when visiting more than 90\% of the websites in our dataset - and we also encountered a large number of websites that present deceiving information, making it it very difficult, if at all possible, for users to avoid being tracked.},
series = {Asia {{CCS}} '19}
}
@ -1173,7 +1081,6 @@
pages = {18--29},
abstract = {Web tracking is a commonly-used practice on the Internet devoted to retrieve user information for activities such as personalization or advertisement. These tec},
journal = {Log J IGPL},
language = {English},
number = {1}
}
@ -1187,8 +1094,7 @@
urldate = {2019-08-14},
abstract = {We perform a large-scale analysis of third-party trackers on the World Wide Web. We extract third-party embeddings from more than 3.5 billion web pages of the CommonCrawl 2012 corpus, and aggregate those to a dataset containing more than 140 million third-party embeddings in over 41 million domains. To the best of our knowledge, this constitutes the largest empirical web tracking dataset collected so far, and exceeds related studies by more than an order of magnitude in the number of domains and web pages analyzed.},
archiveprefix = {arXiv},
eprintclass = {cs},
language = {English}
eprintclass = {cs}
}
@misc{SilverlightEndSupport2015,
@ -1220,22 +1126,7 @@
urldate = {2019-08-14},
abstract = {Websites are constantly adapting the methods used, and intensity with which they track online visitors. However, the wide-range enforcement of GDPR since one year ago (May 2018) forced websites serving EU-based online visitors to eliminate or at least reduce such tracking activity, given they receive proper user consent. erefore, it is important to record and analyze the evolution of this tracking activity and assess the overall ``privacy health'' of the Web ecosystem and if it is be er a er GDPR enforcement. is work makes a significant step towards this direction. In this paper, we analyze the online ecosystem of 3rd-parties embedded in top websites which amass the majority of online tracking through 6 time snapshots taken every few months apart, in the duration of the last 2 years. We perform this analysis in three ways: 1) by looking into the network activity that 3rd-parties impose on each publisher hosting them, 2) by constructing a bipartite graph of ``publisher-to-tracker'', connecting 3rd parties with their publishers, 3) by constructing a ``tracker-to-tracker'' graph connecting 3rd-parties who are commonly found in publishers. We record significant changes through time in number of trackers, traffic induced in publishers (incoming vs. outgoing), embeddedness of trackers in publishers, popularity and mixture of trackers across publishers. We also report how such measures compare with the ranking of publishers based on Alexa. On the last level of our analysis, we dig deeper and look into the connectivity of trackers with each other and how this relates to potential cookie synchronization activity.},
archiveprefix = {arXiv},
eprintclass = {cs},
language = {English}
}
@article{solomosTalonAutomatedFramework2018,
title = {Talon: {{An Automated Framework}} for {{Cross}}-{{Device Tracking Detection}}},
shorttitle = {Talon},
author = {Solomos, Konstantinos and Ilia, Panagiotis and Ioannidis, Sotiris and Kourtellis, Nicolas},
year = {2018},
month = dec,
url = {http://arxiv.org/abs/1812.11393},
urldate = {2019-08-14},
abstract = {Although digital advertising fuels much of today's free Web, it typically does so at the cost of online users' privacy, due to the continuous tracking and leakage of users' personal data. In search for new ways to optimize the effectiveness of ads, advertisers have introduced new advanced paradigms such as cross-device tracking (CDT), to monitor users' browsing on multiple devices and screens, and deliver (re)targeted ads in the most appropriate screen. Unfortunately, this practice leads to greater privacy concerns for the end-user.},
archiveprefix = {arXiv},
eprintclass = {cs},
language = {English}
eprintclass = {cs}
}
@techreport{soltaniFlashCookiesPrivacy2009,
@ -1243,12 +1134,10 @@
author = {Soltani, Ashkan and Canty, Shannon and Mayo, Quentin and Thomas, Lauren and Hoofnagle, Chris Jay},
year = {2009},
month = aug,
address = {{Rochester, NY}},
institution = {{Social Science Research Network}},
url = {https://papers.ssrn.com/abstract=1446862},
urldate = {2020-02-13},
abstract = {This is a pilot study of the use of 'Flash cookies' by popular websites. We find that more than 50\% of the sites in our sample are using flash cookies to store information about the user. Some are using it to 'respawn' or re-instantiate HTTP cookies deleted by the user. Flash cookies often share the same values as HTTP cookies, and are even used on government websites to assign unique values to users. Privacy policies rarely disclose the presence of Flash cookies, and user controls for effectuating privacy preferences are lacking.},
language = {en},
number = {ID 1446862},
type = {{{SSRN Scholarly Paper}}}
}
@ -1262,8 +1151,7 @@
urldate = {2019-08-14},
abstract = {Third party tracking is the practice by which third parties recognize users accross different websites as they browse the web. Recent studies show that 90\% of websites contain third party content that is tracking its users across the web. Website developers often need to include third party content in order to provide basic functionality. However, when a developer includes a third party content, she cannot know whether the third party contains tracking mechanisms. If a website developer wants to protect her users from being tracked, the only solution is to exclude any third-party content, thus trading functionality for privacy.},
archiveprefix = {arXiv},
eprintclass = {cs},
language = {English}
eprintclass = {cs}
}
@misc{soudersAnnouncingHTTPArchive2011,
@ -1286,7 +1174,6 @@
volume = {2016},
pages = {20--33},
journal = {PoPETs},
language = {English},
number = {1}
}
@ -1298,7 +1185,6 @@
volume = {2019},
pages = {255--266},
journal = {PoPETs},
language = {English},
number = {3}
}
@ -1309,7 +1195,6 @@
year = {2018},
month = dec,
pages = {289--299},
address = {{San Juan, PR, USA}},
abstract = {User tracking on the Internet can come in various forms, e.g., via cookies or by fingerprinting web browsers. A technique that got less attention so far is user tracking based on TLS and specifically based on the TLS session resumption mechanism. To the best of our knowledge, we are the first that investigate the applicability of TLS session resumption for user tracking. For that, we evaluated the configuration of 48 popular browsers and one million of the most popular websites. Moreover, we present a so-called prolongation attack, which allows extending the tracking period beyond the lifetime of the session resumption mechanism. To show that under the observed browser configurations tracking via TLS session resumptions is feasible, we also looked into DNS data to understand the longest consecutive tracking period for a user by a particular website. Our results indicate that with the standard setting of the session resumption lifetime in many current browsers, the average user can be tracked for up to eight days. With a session resumption lifetime of seven days, as recommended upper limit in the draft for TLS version 1.3, 65\% of all users in our dataset can be tracked permanently.},
series = {{{ACSAC}} '18}
}
@ -1321,7 +1206,6 @@
year = {2018},
month = apr,
pages = {85--86},
address = {{Lyon, France}},
abstract = {Over the last decade, the number of devices per person has increased substantially. This poses a challenge for cookie-based personalization applications, such as online search and advertising, as it narrows the personalization signal to a single device environment. A key task is to find which cookies belong to the same person to recover a complete cross-device user journey. Recent work on the topic has shown the benefits of using unsupervised embeddings learned on user event sequences. In this paper, we extend this approach to a supervised setting and introduce the Siamese Cookie Embedding Network (SCEmNet), a siamese convolutional architecture that leverages the multi-modal aspect of sequences, and show significant improvement over the state-of-the-art.},
series = {{{WWW}} '18}
}
@ -1335,7 +1219,6 @@
volume = {2019},
pages = {126--145},
journal = {Proceedings on Privacy Enhancing Technologies},
language = {en},
number = {2}
}
@ -1361,7 +1244,6 @@
year = {2015},
month = oct,
pages = {1382--1393},
address = {{Denver, Colorado, USA}},
abstract = {Web-based timing attacks have been known for over a decade, and it has been shown that, under optimal network conditions, an adversary can use such an attack to obtain information on the state of a user in a cross-origin website. In recent years, desktop computers have given way to laptops and mobile devices, which are mostly connected over a wireless or mobile network. These connections often do not meet the optimal conditions that are required to reliably perform cross-site timing attacks. In this paper, we show that modern browsers expose new side-channels that can be used to acquire accurate timing measurements, regardless of network conditions. Using several real-world examples, we introduce four novel web-based timing attacks against modern browsers and describe how an attacker can use them to obtain personal information based on a user's state on a cross-origin website. We evaluate our proposed attacks and demonstrate that they significantly outperform current attacks in terms of speed, reliability, and accuracy. Furthermore, we show that the nature of our attacks renders traditional defenses, i.e., those based on randomly delaying responses, moot and discuss possible server-side defense mechanisms.},
series = {{{CCS}} '15}
}
@ -1374,7 +1256,6 @@
volume = {2019},
pages = {227--244},
journal = {PoPETs},
language = {English},
number = {1}
}
@ -1397,8 +1278,7 @@
urldate = {2020-02-09},
abstract = {This specification defines the Document Object Model Level 1, a platform- and language-neutral interface that allows programs and scripts to dynamically access and update the content, structure and style of documents. The Document Object Model provides a standard set of objects for representing HTML and XML documents, a standard model of how these objects can be combined, and a standard interface for accessing and manipulating them. Vendors can support the DOM as an interface to their proprietary data structures and APIs, and content authors can write to the standard DOM interfaces rather than product-specific APIs, thus increasing interoperability on the Web.
The goal of the DOM specification is to define a programmatic interface for XML and HTML. The DOM Level 1 specification is separated into two parts: Core and HTML. The Core DOM Level 1 section provides a low-level set of fundamental interfaces that can represent any structured document, as well as defining extended interfaces for representing an XML document. These extended XML interfaces need not be implemented by a DOM implementation that only provides access to HTML documents; all of the fundamental interfaces in the Core section must be implemented. A compliant DOM implementation that implements the extended XML interfaces is required to also implement the fundamental Core interfaces, but not the HTML interfaces. The HTML Level 1 section provides additional, higher-level interfaces that are used with the fundamental interfaces defined in the Core Level 1 section to provide a more convenient view of an HTML document. A compliant implementation of the HTML DOM implements all of the fundamental Core interfaces as well as the HTML interfaces.},
language = {en}
The goal of the DOM specification is to define a programmatic interface for XML and HTML. The DOM Level 1 specification is separated into two parts: Core and HTML. The Core DOM Level 1 section provides a low-level set of fundamental interfaces that can represent any structured document, as well as defining extended interfaces for representing an XML document. These extended XML interfaces need not be implemented by a DOM implementation that only provides access to HTML documents; all of the fundamental interfaces in the Core section must be implemented. A compliant DOM implementation that implements the extended XML interfaces is required to also implement the fundamental Core interfaces, but not the HTML interfaces. The HTML Level 1 section provides additional, higher-level interfaces that are used with the fundamental interfaces defined in the Core Level 1 section to provide a more convenient view of an HTML document. A compliant implementation of the HTML DOM implements all of the fundamental Core interfaces as well as the HTML interfaces.}
}
@misc{w3techsHistoricalYearlyTrends2020,
@ -1427,7 +1307,6 @@ The goal of the DOM specification is to define a programmatic interface for XML
year = {2018},
month = jul,
pages = {3},
address = {{Montreal, QC, Canada}},
abstract = {While the Transport Layer Security (TLS) protocol is typically used to authenticate servers, it also offers the possibility to use Client Certificates for to authenticate clients (CCA). We investigate the use of CCA based on two specific concerns: First, CCA is prone to being used in a context that encodes personal data into client certificates, such as identifying persons, e.g. in voting systems or VPN applications. Second, in versions prior to TLS1.3, the client certificate (as well as the server certificate) is being sent in clear text, permitting systematic and large-scale eavesdropping. Based on these two concerns, we investigate the use of CCA at an ISP uplink. Besides confirming our two concerns by finding, e.g., person names in VPN certificates, we also identify the Apple Push Notification Service (APNs) to leverage TLS CCA to identify client devices. We consider this use highly critical as APNs is an integral part of all Apple operating systems, and APNs establishes a connection immediately upon connecting the device to a network. We show that these properties can be used by various attacker types to track devices (and hence, likely users) with great precision across the global Internet. This work was published in 2017, with the TLS1.3 standardization still ongoing, and we aimed to emphasize the necessity of encrypting client certificates in the TLS handshake, which was adopted in the TLS1.3 standard. Based on work published at TMA'17 [1]. [1] Matthias Wachs, Quirin Scheitle, Georg Carle. 2017. Push Away Your Privacy: Precise User Tracking Based on TLS Client Certificate Authentication. In Proceedings of the 2017 Network Traffic Measurement and Analysis Conference (TMA '17)},
series = {{{ANRW}} '18}
}
@ -1499,7 +1378,6 @@ The goal of the DOM specification is to define a programmatic interface for XML
year = {2016},
month = apr,
pages = {121--132},
address = {{Montr\'eal, Qu\'ebec, Canada}},
abstract = {Online tracking poses a serious privacy challenge that has drawn significant attention in both academia and industry. Existing approaches for preventing user tracking, based on curated blocklists, suffer from limited coverage and coarse-grained resolution for classification, rely on exceptions that impact sites' functionality and appearance, and require significant manual maintenance. In this paper we propose a novel approach, based on the concepts leveraged from \$k\$-Anonymity, in which users collectively identify unsafe data elements, which have the potential to identify uniquely an individual user, and remove them from requests. We deployed our system to 200,000 German users running the Cliqz Browser or the Cliqz Firefox extension to evaluate its efficiency and feasibility. Results indicate that our approach achieves better privacy protection than blocklists, as provided by Disconnect, while keeping the site breakage to a minimum, even lower than the community-optimized AdBlock Plus. We also provide evidence of the prevalence and reach of trackers to over 21 million pages of 350,000 unique sites, the largest scale empirical evaluation to date. 95\% of the pages visited contain 3rd party requests to potential trackers and 78\% attempt to transfer unsafe data. Tracker organizations are also ranked, showing that a single organization can reach up to 42\% of all page visits in Germany.},
series = {{{WWW}} '16}
}
@ -1512,8 +1390,7 @@ The goal of the DOM specification is to define a programmatic interface for XML
url = {https://www.sitepen.com/blog/windowname-transport/},
urldate = {2020-02-10},
abstract = {The window.name transport is a new technique for secure cross-domain browser based data transfer, and can be utilized for creating secure mashups with untrusted sources. window.name is implemented in Dojo in the new dojox.io.windowName module, and it is very easy to make web services available through the window.name protocol. window.name works by loading a cross-domain \ldots},
journal = {SitePen},
language = {en-US}
journal = {SitePen}
}