@techreport{70276b7f902646ada8e81c50918a7a17,
title = "RPA: Learning Interpretable Input-Output Relationships by Counting Samples",
abstract = "This work proposes a fast solution algorithm to a fundamental data science problem, namely to identify Boolean rules in disjunctive normal form (DNF) that classify samples based on binary features. The algorithm is an explainable machine learning method: it provides an explicit input-output relationship. It is based on hypothesis tests through confidence intervals, where the used test statistic requires nothing more than counting the number of cases and the number of controls that possess a certain feature or a set of features, reflecting the potential AND clauses of the Boolean phrase. Extensive experiments on simulated data demonstrate the algorithm{\textquoteright}s effectivity and efficiency. The efficiency of the algorithm relies on the fact that the bottleneck operation is a matrix multiplication of the input matrix with itself. More than only a solution algorithm, this paper offers a flexible and transparent theoretical framework with a statistical analysis of the problem and many entry points for future adjustments and improvements. Among other things, this framework allows one to assess the feasibility of identifying the input-output relationships given certain easily-obtained characteristics of the data.",
keywords = "interpretability, Binary classification, Boolean rules in DNF, Confidence intervals, Feasibility analysis",
author = "{de Vos}, Wout and Marleen Balvert",
note = "CentER Discussion Paper Nr. 2023-015",
year = "2023",
month = jul,
day = "10",
language = "English",
volume = "2023-015",
series = "CentER Discussion Paper",
publisher = "CentER, Center for Economic Research",
type = "WorkingPaper",
institution = "CentER, Center for Economic Research",
}