Add Explainability section

This commit is contained in:
Tobias Eidelpes 2021-12-17 18:25:12 +01:00
parent 8db9ea21f2
commit 708dbc4d49
2 changed files with 54 additions and 1 deletions

View File

@ -152,6 +152,23 @@
file = {/home/zenon/Zotero/storage/FZVU8FXW/Mehrabi et al. - 2021 - A Survey on Bias and Fairness in Machine Learning.pdf}
}
@inproceedings{ribeiroWhyShouldTrust2016,
title = {Why {{Should I Trust You}}?: {{Explaining}} the {{Predictions}} of {{Any Classifier}}},
shorttitle = {"{{Why Should I Trust You}}?},
booktitle = {Proceedings of the 22nd {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} and {{Data Mining}}},
author = {Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos},
date = {2016-08-13},
series = {{{KDD}} '16},
pages = {1135--1144},
publisher = {{Association for Computing Machinery}},
location = {{New York, NY, USA}},
doi = {10.1145/2939672.2939778},
abstract = {Despite widespread adoption, machine learning models remain mostly black boxes. Understanding the reasons behind predictions is, however, quite important in assessing trust, which is fundamental if one plans to take action based on a prediction, or when choosing whether to deploy a new model. Such understanding also provides insights into the model, which can be used to transform an untrustworthy model or prediction into a trustworthy one. In this work, we propose LIME, a novel explanation technique that explains the predictions of any classifier in an interpretable and faithful manner, by learning an interpretable model locally varound the prediction. We also propose a method to explain models by presenting representative individual predictions and their explanations in a non-redundant way, framing the task as a submodular optimization problem. We demonstrate the flexibility of these methods by explaining different models for text (e.g. random forests) and image classification (e.g. neural networks). We show the utility of explanations via novel experiments, both simulated and with human subjects, on various scenarios that require trust: deciding if one should trust a prediction, choosing between models, improving an untrustworthy classifier, and identifying why a classifier should not be trusted.},
isbn = {978-1-4503-4232-2},
keywords = {black box classifier,explaining machine learning,interpretability,interpretable machine learning},
file = {/home/zenon/Zotero/storage/5F5BJIAT/Ribeiro et al. - 2016 - Why Should I Trust You Explaining the Predicti.pdf}
}
@online{roseFaceDetectionCamerasGlitches2010,
title = {Face-{{Detection Cameras}}: {{Glitches Spur Charges}} of {{Racism}}},
author = {Rose, Adam},

View File

@ -331,7 +331,43 @@ ever-increasing model complexity, made possible by massive deep neural networks
(DNNs) and other similarly complex architectures. Due to their size models are
treated as black-boxes with no apparent way to know how a particular prediction
came to be. This lack of explainability disallows humans to trust artificial
intelligence systems especially in critical areas such as medicine.
intelligence systems especially in critical areas such as medicine. To combat
the development towards difficult to understand artificial intelligence systems,
a new research field called \emph{eXplainable Artificial Intelligence} (XAI) has
emerged.
Scholars distinguish between two similar but slightly different terms:
\emph{explainability} and \emph{interpretability}. Interpretable systems allow
humans to \emph{look inside} the model to determine which predictions it is
going to make. This is only possible if most or all parameters of the model are
visible to an observer and changes to those parameters result in predictable
changes in outputs. Explainability, on the other hand, applies to black-box
systems such as deep neural networks where the system explains its predictions
after the fact.
The definition of interpretability already provides one possibility for
explainable models. If the model is constructed in a way which makes the
parameters visible and a decision can be traced from a starting point to the
outcome, the model is inherently explainable. Examples are decision trees,
linear regression models, rule-based models and Bayesian networks. This approach
is not possible for neural networks and thus \emph{model-agnostic explanations}
have to be found. \textsc{LIME} \cite{ribeiroWhyShouldTrust2016} is a tool to
find such model-agnostic explanations. \textsc{LIME} works \enquote{…by learning
an interpretable model locally around the prediction}
\cite[p.~1]{ribeiroWhyShouldTrust2016}. An advantage of this approach is that
\textsc{LIME} is useful for any model, regardless of how it is constructed. Due
to the high amount of flexibility introduced by model\nobreakdash-agnostic
explanations, these can even be used for already interpretable models such as
random forest classifiers.
Deep neural networks can also be explained using either a \emph{gradient-based}
or \emph{perturbation-based} explanation algorithm. Gradient-based algorithms
attempt to evaluate how much outputs change if inputs are modified. If the
gradient for a set of inputs is large, those inputs have a large effect on
outputs. Similarly, a small gradient indicates that the change in inputs does
not affect the outputs to a large extent. Perturbation-based explanations work
by finding perturbations in the inputs that alter the model's predictions the
most. \textsc{LIME} is an example of a perturbation-based explanation algorithm.
\subsection{Privacy}