trustworthy-ai/trustworthy-ai.bib

323 lines
34 KiB
BibTeX
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

@online{angwinMachineBias2016,
title = {Machine {{Bias}}},
author = {Angwin, Julia and Larson, Jeff and Mattu, Surya and Kirchner, Lauren},
date = {2016-05-23},
url = {https://www.propublica.org/article/machine-bias-risk-assessments-in-criminal-sentencing},
urldate = {2021-12-16},
abstract = {Theres software used across the country to predict future criminals. And its biased against blacks.},
langid = {english},
organization = {{ProPublica}},
file = {/home/zenon/Zotero/storage/4ZVTSG7A/machine-bias-risk-assessments-in-criminal-sentencing.html}
}
@online{bellamyAIFairness3602018,
title = {{{AI Fairness}} 360: {{An Extensible Toolkit}} for {{Detecting}}, {{Understanding}}, and {{Mitigating Unwanted Algorithmic Bias}}},
shorttitle = {{{AI Fairness}} 360},
author = {Bellamy, Rachel K. E. and Dey, Kuntal and Hind, Michael and Hoffman, Samuel C. and Houde, Stephanie and Kannan, Kalapriya and Lohia, Pranay and Martino, Jacquelyn and Mehta, Sameep and Mojsilovic, Aleksandra and Nagar, Seema and Ramamurthy, Karthikeyan Natesan and Richards, John and Saha, Diptikalyan and Sattigeri, Prasanna and Singh, Moninder and Varshney, Kush R. and Zhang, Yunfeng},
date = {2018-10-03},
eprint = {1810.01943},
eprinttype = {arxiv},
primaryclass = {cs},
url = {http://arxiv.org/abs/1810.01943},
urldate = {2021-12-16},
abstract = {Fairness is an increasingly important concern as machine learning models are used to support decision making in high-stakes applications such as mortgage lending, hiring, and prison sentencing. This paper introduces a new open source Python toolkit for algorithmic fairness, AI Fairness 360 (AIF360), released under an Apache v2.0 license \{https://github.com/ibm/aif360). The main objectives of this toolkit are to help facilitate the transition of fairness research algorithms to use in an industrial setting and to provide a common framework for fairness researchers to share and evaluate algorithms. The package includes a comprehensive set of fairness metrics for datasets and models, explanations for these metrics, and algorithms to mitigate bias in datasets and models. It also includes an interactive Web experience (https://aif360.mybluemix.net) that provides a gentle introduction to the concepts and capabilities for line-of-business users, as well as extensive documentation, usage guidance, and industry-specific tutorials to enable data scientists and practitioners to incorporate the most appropriate tool for their problem into their work products. The architecture of the package has been engineered to conform to a standard paradigm used in data science, thereby further improving usability for practitioners. Such architectural design and abstractions enable researchers and developers to extend the toolkit with their new algorithms and improvements, and to use it for performance benchmarking. A built-in testing infrastructure maintains code quality.\vphantom\}},
archiveprefix = {arXiv},
version = {1},
keywords = {Computer Science - Artificial Intelligence},
file = {/home/zenon/Zotero/storage/QGE62G3L/Bellamy et al. - 2018 - AI Fairness 360 An Extensible Toolkit for Detecti.pdf;/home/zenon/Zotero/storage/ZD7NM9WG/1810.html}
}
@article{dustdarSocialComputeUnit2011,
title = {The {{Social Compute Unit}}},
author = {Dustdar, Schahram and Bhattacharya, Kamal},
date = {2011-05},
journaltitle = {IEEE Internet Computing},
volume = {15},
number = {3},
pages = {64--69},
issn = {1941-0131},
doi = {10.1109/MIC.2011.68},
abstract = {Social computing is perceived mainly as a vehicle for establishing and maintaining private relationships and thus lacks mainstream adoption in enterprises. Collaborative computing, however, is firmly established, but no tight integration of the two approaches exists. Here, the authors look at how to integrate people, in the form of human-based computing, and software services into one composite system.},
eventtitle = {{{IEEE Internet Computing}}},
keywords = {Collaboration,Online services,Privacy,service-oriented computing,social compute power,social compute unit,social computing,Social network services,workflow},
file = {/home/zenon/Zotero/storage/BRUJCIMC/Dustdar and Bhattacharya - 2011 - The Social Compute Unit.pdf;/home/zenon/Zotero/storage/IB8NK88P/5755601.html}
}
@online{europeancommissionEthicsGuidelinesTrustworthy,
title = {Ethics Guidelines for Trustworthy {{AI}} | {{Shaping Europe}}s Digital Future},
author = {European Commission},
url = {https://digital-strategy.ec.europa.eu/en/library/ethics-guidelines-trustworthy-ai},
urldate = {2021-12-13},
abstract = {On 8 April 2019, the High-Level Expert Group on AI presented Ethics Guidelines for Trustworthy Artificial Intelligence. This followed the publication of the guidelines' first draft in December 2018 on which more than 500 comments were received through an open consultation.},
langid = {english},
file = {/home/zenon/Zotero/storage/JG9TE5X8/ethics-guidelines-trustworthy-ai.html}
}
@article{ferrarioAIWeTrust2020,
title = {In {{AI We Trust Incrementally}}: A {{Multi-layer Model}} of {{Trust}} to {{Analyze Human-Artificial Intelligence Interactions}}},
shorttitle = {In {{AI We Trust Incrementally}}},
author = {Ferrario, Andrea and Loi, Michele and Viganò, Eleonora},
date = {2020-09-01},
journaltitle = {Philosophy \& Technology},
shortjournal = {Philos. Technol.},
volume = {33},
number = {3},
pages = {523--539},
issn = {2210-5441},
doi = {10.1007/s13347-019-00378-3},
abstract = {Real engines of the artificial intelligence (AI) revolution, machine learning (ML) models, and algorithms are embedded nowadays in many services and products around us. As a society, we argue it is now necessary to transition into a phronetic paradigm focused on the ethical dilemmas stemming from the conception and application of AIs to define actionable recommendations as well as normative solutions. However, both academic research and society-driven initiatives are still quite far from clearly defining a solid program of study and intervention. In this contribution, we will focus on selected ethical investigations around AI by proposing an incremental model of trust that can be applied to both human-human and human-AI interactions. Starting with a quick overview of the existing accounts of trust, with special attention to Taddeos concept of “e-trust,” we will discuss all the components of the proposed model and the reasons to trust in human-AI interactions in an example of relevance for business organizations. We end this contribution with an analysis of the epistemic and pragmatic reasons of trust in human-AI interactions and with a discussion of kinds of normativity in trustworthiness of AIs.},
langid = {english},
file = {/home/zenon/Zotero/storage/TKPD5797/Ferrario et al. - 2020 - In AI We Trust Incrementally a Multi-layer Model .pdf}
}
@inproceedings{fredriksonModelInversionAttacks2015,
title = {Model {{Inversion Attacks}} That {{Exploit Confidence Information}} and {{Basic Countermeasures}}},
booktitle = {Proceedings of the 22nd {{ACM SIGSAC Conference}} on {{Computer}} and {{Communications Security}}},
author = {Fredrikson, Matt and Jha, Somesh and Ristenpart, Thomas},
date = {2015-10-12},
series = {{{CCS}} '15},
pages = {1322--1333},
publisher = {{Association for Computing Machinery}},
location = {{New York, NY, USA}},
doi = {10.1145/2810103.2813677},
abstract = {Machine-learning (ML) algorithms are increasingly utilized in privacy-sensitive applications such as predicting lifestyle choices, making medical diagnoses, and facial recognition. In a model inversion attack, recently introduced in a case study of linear classifiers in personalized medicine by Fredrikson et al., adversarial access to an ML model is abused to learn sensitive genomic information about individuals. Whether model inversion attacks apply to settings outside theirs, however, is unknown. We develop a new class of model inversion attack that exploits confidence values revealed along with predictions. Our new attacks are applicable in a variety of settings, and we explore two in depth: decision trees for lifestyle surveys as used on machine-learning-as-a-service systems and neural networks for facial recognition. In both cases confidence values are revealed to those with the ability to make prediction queries to models. We experimentally show attacks that are able to estimate whether a respondent in a lifestyle survey admitted to cheating on their significant other and, in the other context, show how to recover recognizable images of people's faces given only their name and access to the ML model. We also initiate experimental exploration of natural countermeasures, investigating a privacy-aware decision tree training algorithm that is a simple variant of CART learning, as well as revealing only rounded confidence values. The lesson that emerges is that one can avoid these kinds of MI attacks with negligible degradation to utility.},
isbn = {978-1-4503-3832-5},
keywords = {attacks,machine learning,privacy},
file = {/home/zenon/Zotero/storage/7TSTC9I6/Fredrikson et al. - 2015 - Model Inversion Attacks that Exploit Confidence In.pdf}
}
@article{friedmanBiasComputerSystems1996,
title = {Bias in Computer Systems},
author = {Friedman, Batya and Nissenbaum, Helen},
date = {1996-07-01},
journaltitle = {ACM Transactions on Information Systems},
shortjournal = {ACM Trans. Inf. Syst.},
volume = {14},
number = {3},
pages = {330--347},
issn = {1046-8188},
doi = {10.1145/230538.230561},
abstract = {From an analysis of actual cases, three categories of bias in computer systems have been developed: preexisting, technical, and emergent. Preexisting bias has its roots in social institutions, practices, and attitudes. Technical bias arises from technical constraints of considerations. Emergent bias arises in a context of use. Although others have pointed to bias inparticular computer systems and have noted the general problem, we know of no comparable work that examines this phenomenon comprehensively and which offers a framework for understanding and remedying it. We conclude by suggesting that freedom from bias should by counted amoung the select set of criteria—including reliability, accuracy, and efficiency—according to which the quality of systems in use in society should be judged.},
keywords = {bias,computer ethics,computers and society,design methods,ethics,human values,social computing,social impact,standards,system design,universal design,values},
file = {/home/zenon/Zotero/storage/SSN9KLVR/Friedman and Nissenbaum - 1996 - Bias in computer systems.pdf}
}
@online{hintonDistillingKnowledgeNeural2015,
title = {Distilling the {{Knowledge}} in a {{Neural Network}}},
author = {Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
date = {2015-03-09},
eprint = {1503.02531},
eprinttype = {arxiv},
primaryclass = {cs, stat},
url = {http://arxiv.org/abs/1503.02531},
urldate = {2021-12-17},
abstract = {A very simple way to improve the performance of almost any machine learning algorithm is to train many different models on the same data and then to average their predictions. Unfortunately, making predictions using a whole ensemble of models is cumbersome and may be too computationally expensive to allow deployment to a large number of users, especially if the individual models are large neural nets. Caruana and his collaborators have shown that it is possible to compress the knowledge in an ensemble into a single model which is much easier to deploy and we develop this approach further using a different compression technique. We achieve some surprising results on MNIST and we show that we can significantly improve the acoustic model of a heavily used commercial system by distilling the knowledge in an ensemble of models into a single model. We also introduce a new type of ensemble composed of one or more full models and many specialist models which learn to distinguish fine-grained classes that the full models confuse. Unlike a mixture of experts, these specialist models can be trained rapidly and in parallel.},
archiveprefix = {arXiv},
keywords = {Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Statistics - Machine Learning},
file = {/home/zenon/Zotero/storage/W6UFKZ42/Hinton et al. - 2015 - Distilling the Knowledge in a Neural Network.pdf;/home/zenon/Zotero/storage/CK5D3KWG/1503.html}
}
@online{huMembershipInferenceAttacks2021,
title = {Membership {{Inference Attacks}} on {{Machine Learning}}: {{A Survey}}},
shorttitle = {Membership {{Inference Attacks}} on {{Machine Learning}}},
author = {Hu, Hongsheng and Salcic, Zoran and Sun, Lichao and Dobbie, Gillian and Yu, Philip S. and Zhang, Xuyun},
date = {2021-11-07},
eprint = {2103.07853},
eprinttype = {arxiv},
primaryclass = {cs},
url = {http://arxiv.org/abs/2103.07853},
urldate = {2021-12-17},
abstract = {Machine learning (ML) models have been widely applied to various applications, including image classification, text generation, audio recognition, and graph data analysis. However, recent studies have shown that ML models are vulnerable to membership inference attacks (MIAs), which aim to infer whether a data record was used to train a target model or not. MIAs on ML models can directly lead to a privacy breach. For example, via identifying the fact that a clinical record that has been used to train a model associated with a certain disease, an attacker can infer that the owner of the clinical record has the disease with a high chance. In recent years, MIAs have been shown to be effective on various ML models, e.g., classification models and generative models. Meanwhile, many defense methods have been proposed to mitigate MIAs. Although MIAs on ML models form a newly emerging and rapidly growing research area, there has been no systematic survey on this topic yet. In this paper, we conduct the first comprehensive survey on membership inference attacks and defenses. We provide the taxonomies for both attacks and defenses, based on their characterizations, and discuss their pros and cons. Based on the limitations and gaps identified in this survey, we point out several promising future research directions to inspire the researchers who wish to follow this area. This survey not only serves as a reference for the research community but also brings a clear picture to researchers outside this research domain. To further facilitate the researchers, we have created an online resource repository and keep updating it with the future relevant works. Interested readers can find the repository at https://github.com/HongshengHu/membership-inference-machine-learning-literature.},
archiveprefix = {arXiv},
version = {3},
keywords = {Computer Science - Cryptography and Security,Computer Science - Machine Learning},
file = {/home/zenon/Zotero/storage/CZXPXDZF/Hu et al. - 2021 - Membership Inference Attacks on Machine Learning .pdf;/home/zenon/Zotero/storage/B538X3B2/2103.html}
}
@article{lambrechtAlgorithmicBiasEmpirical2019,
title = {Algorithmic {{Bias}}? {{An Empirical Study}} of {{Apparent Gender-Based Discrimination}} in the {{Display}} of {{STEM Career Ads}}},
shorttitle = {Algorithmic {{Bias}}?},
author = {Lambrecht, Anja and Tucker, Catherine},
date = {2019-07-01},
journaltitle = {Management Science},
volume = {65},
number = {7},
pages = {2966--2981},
publisher = {{INFORMS}},
issn = {0025-1909},
doi = {10.1287/mnsc.2018.3093},
abstract = {We explore data from a field test of how an algorithm delivered ads promoting job opportunities in the science, technology, engineering and math fields. This ad was explicitly intended to be gender neutral in its delivery. Empirically, however, fewer women saw the ad than men. This happened because younger women are a prized demographic and are more expensive to show ads to. An algorithm that simply optimizes cost-effectiveness in ad delivery will deliver ads that were intended to be gender neutral in an apparently discriminatory way, because of crowding out. We show that this empirical regularity extends to other major digital platforms. This paper was accepted by Joshua Gans, business strategy.},
keywords = {algorithmic bias,algorithms,artificial intelligence,online advertising},
file = {/home/zenon/Zotero/storage/J79LR42T/Lambrecht and Tucker - 2019 - Algorithmic Bias An Empirical Study of Apparent G.pdf}
}
@online{liuTrustworthyAIComputational2021,
title = {Trustworthy {{AI}}: {{A Computational Perspective}}},
shorttitle = {Trustworthy {{AI}}},
author = {Liu, Haochen and Wang, Yiqi and Fan, Wenqi and Liu, Xiaorui and Li, Yaxin and Jain, Shaili and Liu, Yunhao and Jain, Anil K. and Tang, Jiliang},
date = {2021-08-18},
eprint = {2107.06641},
eprinttype = {arxiv},
primaryclass = {cs},
url = {http://arxiv.org/abs/2107.06641},
urldate = {2021-11-03},
abstract = {In the past few decades, artificial intelligence (AI) technology has experienced swift developments, changing everyone's daily life and profoundly altering the course of human society. The intention of developing AI is to benefit humans, by reducing human labor, bringing everyday convenience to human lives, and promoting social good. However, recent research and AI applications show that AI can cause unintentional harm to humans, such as making unreliable decisions in safety-critical scenarios or undermining fairness by inadvertently discriminating against one group. Thus, trustworthy AI has attracted immense attention recently, which requires careful consideration to avoid the adverse effects that AI may bring to humans, so that humans can fully trust and live in harmony with AI technologies. Recent years have witnessed a tremendous amount of research on trustworthy AI. In this survey, we present a comprehensive survey of trustworthy AI from a computational perspective, to help readers understand the latest technologies for achieving trustworthy AI. Trustworthy AI is a large and complex area, involving various dimensions. In this work, we focus on six of the most crucial dimensions in achieving trustworthy AI: (i) Safety \& Robustness, (ii) Non-discrimination \& Fairness, (iii) Explainability, (iv) Privacy, (v) Accountability \& Auditability, and (vi) Environmental Well-Being. For each dimension, we review the recent related technologies according to a taxonomy and summarize their applications in real-world systems. We also discuss the accordant and conflicting interactions among different dimensions and discuss potential aspects for trustworthy AI to investigate in the future.},
archiveprefix = {arXiv},
version = {3},
keywords = {Computer Science - Artificial Intelligence},
file = {/home/zenon/Zotero/storage/3SPRGW2M/Liu et al. - 2021 - Trustworthy AI A Computational Perspective.pdf;/home/zenon/Zotero/storage/8AUMUFD2/2107.html}
}
@online{madryDeepLearningModels2019,
title = {Towards {{Deep Learning Models Resistant}} to {{Adversarial Attacks}}},
author = {Madry, Aleksander and Makelov, Aleksandar and Schmidt, Ludwig and Tsipras, Dimitris and Vladu, Adrian},
date = {2019-09-04},
eprint = {1706.06083},
eprinttype = {arxiv},
primaryclass = {cs, stat},
url = {http://arxiv.org/abs/1706.06083},
urldate = {2021-12-16},
abstract = {Recent work has demonstrated that deep neural networks are vulnerable to adversarial examples---inputs that are almost indistinguishable from natural data and yet classified incorrectly by the network. In fact, some of the latest findings suggest that the existence of adversarial attacks may be an inherent weakness of deep learning models. To address this problem, we study the adversarial robustness of neural networks through the lens of robust optimization. This approach provides us with a broad and unifying view on much of the prior work on this topic. Its principled nature also enables us to identify methods for both training and attacking neural networks that are reliable and, in a certain sense, universal. In particular, they specify a concrete security guarantee that would protect against any adversary. These methods let us train networks with significantly improved resistance to a wide range of adversarial attacks. They also suggest the notion of security against a first-order adversary as a natural and broad security guarantee. We believe that robustness against such well-defined classes of adversaries is an important stepping stone towards fully resistant deep learning models. Code and pre-trained models are available at https://github.com/MadryLab/mnist\_challenge and https://github.com/MadryLab/cifar10\_challenge.},
archiveprefix = {arXiv},
keywords = {Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Statistics - Machine Learning},
file = {/home/zenon/Zotero/storage/6NVLSNAG/Madry et al. - 2019 - Towards Deep Learning Models Resistant to Adversar.pdf;/home/zenon/Zotero/storage/TBT64G7J/1706.html}
}
@article{mehrabiSurveyBiasFairness2021,
title = {A {{Survey}} on {{Bias}} and {{Fairness}} in {{Machine Learning}}},
author = {Mehrabi, Ninareh and Morstatter, Fred and Saxena, Nripsuta and Lerman, Kristina and Galstyan, Aram},
date = {2021-07-13},
journaltitle = {ACM Computing Surveys},
shortjournal = {ACM Comput. Surv.},
volume = {54},
number = {6},
pages = {115:1--115:35},
issn = {0360-0300},
doi = {10.1145/3457607},
abstract = {With the widespread use of artificial intelligence (AI) systems and applications in our everyday lives, accounting for fairness has gained significant importance in designing and engineering of such systems. AI systems can be used in many sensitive environments to make important and life-changing decisions; thus, it is crucial to ensure that these decisions do not reflect discriminatory behavior toward certain groups or populations. More recently some work has been developed in traditional machine learning and deep learning that address such challenges in different subdomains. With the commercialization of these systems, researchers are becoming more aware of the biases that these applications can contain and are attempting to address them. In this survey, we investigated different real-world applications that have shown biases in various ways, and we listed different sources of biases that can affect AI applications. We then created a taxonomy for fairness definitions that machine learning researchers have defined to avoid the existing bias in AI systems. In addition to that, we examined different domains and subdomains in AI showing what researchers have observed with regard to unfair outcomes in the state-of-the-art methods and ways they have tried to address them. There are still many future directions and solutions that can be taken to mitigate the problem of bias in AI systems. We are hoping that this survey will motivate researchers to tackle these issues in the near future by observing existing work in their respective fields.},
keywords = {deep learning,Fairness and bias in artificial intelligence,machine learning,natural language processing,representation learning},
file = {/home/zenon/Zotero/storage/FZVU8FXW/Mehrabi et al. - 2021 - A Survey on Bias and Fairness in Machine Learning.pdf}
}
@inproceedings{ribeiroWhyShouldTrust2016,
title = {Why {{Should I Trust You}}?: {{Explaining}} the {{Predictions}} of {{Any Classifier}}},
shorttitle = {"{{Why Should I Trust You}}?},
booktitle = {Proceedings of the 22nd {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} and {{Data Mining}}},
author = {Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos},
date = {2016-08-13},
series = {{{KDD}} '16},
pages = {1135--1144},
publisher = {{Association for Computing Machinery}},
location = {{New York, NY, USA}},
doi = {10.1145/2939672.2939778},
abstract = {Despite widespread adoption, machine learning models remain mostly black boxes. Understanding the reasons behind predictions is, however, quite important in assessing trust, which is fundamental if one plans to take action based on a prediction, or when choosing whether to deploy a new model. Such understanding also provides insights into the model, which can be used to transform an untrustworthy model or prediction into a trustworthy one. In this work, we propose LIME, a novel explanation technique that explains the predictions of any classifier in an interpretable and faithful manner, by learning an interpretable model locally varound the prediction. We also propose a method to explain models by presenting representative individual predictions and their explanations in a non-redundant way, framing the task as a submodular optimization problem. We demonstrate the flexibility of these methods by explaining different models for text (e.g. random forests) and image classification (e.g. neural networks). We show the utility of explanations via novel experiments, both simulated and with human subjects, on various scenarios that require trust: deciding if one should trust a prediction, choosing between models, improving an untrustworthy classifier, and identifying why a classifier should not be trusted.},
isbn = {978-1-4503-4232-2},
keywords = {black box classifier,explaining machine learning,interpretability,interpretable machine learning},
file = {/home/zenon/Zotero/storage/5F5BJIAT/Ribeiro et al. - 2016 - Why Should I Trust You Explaining the Predicti.pdf}
}
@online{roseFaceDetectionCamerasGlitches2010,
title = {Face-{{Detection Cameras}}: {{Glitches Spur Charges}} of {{Racism}}},
author = {Rose, Adam},
date = {2010-01-22},
url = {https://content.time.com/time/business/article/0,8599,1954643,00.html},
urldate = {2021-12-16},
organization = {{TIME}},
file = {/home/zenon/Zotero/storage/7EKLAKR5/0,8599,1954643,00.html}
}
@book{russellArtificialIntelligenceModern2021,
title = {Artificial {{Intelligence}}: {{A Modern Approach}}, {{Global Edition}}},
shorttitle = {Artificial {{Intelligence}}},
author = {Russell, Stuart J. and Norvig, Peter},
date = {2021},
edition = {4},
publisher = {{Pearson}},
isbn = {978-0-13-461099-3},
file = {/home/zenon/Zotero/storage/LADUV26B/Russell and Norvig - 2021 - Artificial Intelligence A Modern Approach, Global.pdf}
}
@online{saleiroAequitasBiasFairness2019,
title = {Aequitas: {{A Bias}} and {{Fairness Audit Toolkit}}},
shorttitle = {Aequitas},
author = {Saleiro, Pedro and Kuester, Benedict and Hinkson, Loren and London, Jesse and Stevens, Abby and Anisfeld, Ari and Rodolfa, Kit T. and Ghani, Rayid},
date = {2019-04-29},
eprint = {1811.05577},
eprinttype = {arxiv},
primaryclass = {cs},
url = {http://arxiv.org/abs/1811.05577},
urldate = {2021-12-16},
abstract = {Recent work has raised concerns on the risk of unintended bias in AI systems being used nowadays that can affect individuals unfairly based on race, gender or religion, among other possible characteristics. While a lot of bias metrics and fairness definitions have been proposed in recent years, there is no consensus on which metric/definition should be used and there are very few available resources to operationalize them. Therefore, despite recent awareness, auditing for bias and fairness when developing and deploying AI systems is not yet a standard practice. We present Aequitas, an open source bias and fairness audit toolkit that is an intuitive and easy to use addition to the machine learning workflow, enabling users to seamlessly test models for several bias and fairness metrics in relation to multiple population sub-groups. Aequitas facilitates informed and equitable decisions around developing and deploying algorithmic decision making systems for both data scientists, machine learning researchers and policymakers.},
archiveprefix = {arXiv},
version = {2},
keywords = {Computer Science - Artificial Intelligence,Computer Science - Computers and Society,Computer Science - Machine Learning},
file = {/home/zenon/Zotero/storage/PSQU2E7G/Saleiro et al. - 2019 - Aequitas A Bias and Fairness Audit Toolkit.pdf;/home/zenon/Zotero/storage/JIBAJ9KD/1811.html}
}
@inproceedings{shokriMembershipInferenceAttacks2017,
title = {Membership {{Inference Attacks Against Machine Learning Models}}},
booktitle = {2017 {{IEEE Symposium}} on {{Security}} and {{Privacy}} ({{SP}})},
author = {Shokri, Reza and Stronati, Marco and Song, Congzheng and Shmatikov, Vitaly},
date = {2017-05},
pages = {3--18},
issn = {2375-1207},
doi = {10.1109/SP.2017.41},
abstract = {We quantitatively investigate how machine learning models leak information about the individual data records on which they were trained. We focus on the basic membership inference attack: given a data record and black-box access to a model, determine if the record was in the model's training dataset. To perform membership inference against a target model, we make adversarial use of machine learning and train our own inference model to recognize differences in the target model's predictions on the inputs that it trained on versus the inputs that it did not train on. We empirically evaluate our inference techniques on classification models trained by commercial "machine learning as a service" providers such as Google and Amazon. Using realistic datasets and classification tasks, including a hospital discharge dataset whose membership is sensitive from the privacy perspective, we show that these models can be vulnerable to membership inference attacks. We then investigate the factors that influence this leakage and evaluate mitigation strategies.},
eventtitle = {2017 {{IEEE Symposium}} on {{Security}} and {{Privacy}} ({{SP}})},
keywords = {Data models,Google,Predictive models,Privacy,Sociology,Statistics,Training},
file = {/home/zenon/Zotero/storage/KUJRX2H8/Shokri et al. - 2017 - Membership Inference Attacks Against Machine Learn.pdf;/home/zenon/Zotero/storage/I73BEWN3/7958568.html}
}
@article{suhTrustworthinessMobileCyberPhysical2021,
title = {Trustworthiness in {{Mobile Cyber-Physical Systems}}},
author = {Suh, Hyo-Joong and Son, Junggab and Kang, Kyungtae},
date = {2021-01},
journaltitle = {Applied Sciences},
volume = {11},
number = {4},
pages = {1676},
publisher = {{Multidisciplinary Digital Publishing Institute}},
doi = {10.3390/app11041676},
abstract = {As they continue to become faster and cheaper, devices with enhanced computing and communication capabilities are increasingly incorporated into diverse objects and structures in the physical environment [...]},
issue = {4},
langid = {english},
keywords = {n/a},
file = {/home/zenon/Zotero/storage/EQDGFNC4/Suh et al. - 2021 - Trustworthiness in Mobile Cyber-Physical Systems.pdf;/home/zenon/Zotero/storage/798R34VM/1676.html}
}
@online{tsiprasRobustnessMayBe2019,
title = {Robustness {{May Be}} at {{Odds}} with {{Accuracy}}},
author = {Tsipras, Dimitris and Santurkar, Shibani and Engstrom, Logan and Turner, Alexander and Madry, Aleksander},
date = {2019-09-09},
eprint = {1805.12152},
eprinttype = {arxiv},
primaryclass = {cs, stat},
url = {http://arxiv.org/abs/1805.12152},
urldate = {2021-12-16},
abstract = {We show that there may exist an inherent tension between the goal of adversarial robustness and that of standard generalization. Specifically, training robust models may not only be more resource-consuming, but also lead to a reduction of standard accuracy. We demonstrate that this trade-off between the standard accuracy of a model and its robustness to adversarial perturbations provably exists in a fairly simple and natural setting. These findings also corroborate a similar phenomenon observed empirically in more complex settings. Further, we argue that this phenomenon is a consequence of robust classifiers learning fundamentally different feature representations than standard classifiers. These differences, in particular, seem to result in unexpected benefits: the representations learned by robust models tend to align better with salient data characteristics and human perception.},
archiveprefix = {arXiv},
version = {5},
keywords = {Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Statistics - Machine Learning},
file = {/home/zenon/Zotero/storage/VWTMWIEK/Tsipras et al. - 2019 - Robustness May Be at Odds with Accuracy.pdf;/home/zenon/Zotero/storage/DG7EDYAM/1805.html}
}
@article{xuAdversarialAttacksDefenses2020,
title = {Adversarial {{Attacks}} and {{Defenses}} in {{Images}}, {{Graphs}} and {{Text}}: {{A Review}}},
shorttitle = {Adversarial {{Attacks}} and {{Defenses}} in {{Images}}, {{Graphs}} and {{Text}}},
author = {Xu, Han and Ma, Yao and Liu, Hao-Chen and Deb, Debayan and Liu, Hui and Tang, Ji-Liang and Jain, Anil K.},
date = {2020-04-01},
journaltitle = {International Journal of Automation and Computing},
shortjournal = {Int. J. Autom. Comput.},
volume = {17},
number = {2},
pages = {151--178},
issn = {1751-8520},
doi = {10.1007/s11633-019-1211-x},
abstract = {Deep neural networks (DNN) have achieved unprecedented success in numerous machine learning tasks in various domains. However, the existence of adversarial examples raises our concerns in adopting deep learning to safety-critical applications. As a result, we have witnessed increasing interests in studying attack and defense mechanisms for DNN models on different data types, such as images, graphs and text. Thus, it is necessary to provide a systematic and comprehensive overview of the main threats of attacks and the success of corresponding countermeasures. In this survey, we review the state of the art algorithms for generating adversarial examples and the countermeasures against adversarial examples, for three most popular data types, including images, graphs and text.},
langid = {english},
file = {/home/zenon/Zotero/storage/LWZNKZLR/Xu et al. - 2020 - Adversarial Attacks and Defenses in Images, Graphs.pdf}
}