1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
- from copy import deepcopy
- from functools import partial
- import numpy as np
- import sklearn
- from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
- from sklearn.base import BaseEstimator, ClassifierMixin, MetaEstimatorMixin
- from sklearn.model_selection import train_test_split
- from sklearn.preprocessing import normalize
- from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
- from sklearn.utils.multiclass import check_classification_targets, unique_labels
- from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
- from imodels.rule_set.rule_set import RuleSet
- from imodels.rule_set.slipper_util import SlipperBaseEstimator
- from imodels.util.arguments import check_fit_arguments
- from imodels.util.convert import tree_to_code, tree_to_rules, dict_to_rule
- from imodels.util.rule import Rule, get_feature_dict, replace_feature_name
- class BoostedRulesClassifier(AdaBoostClassifier):
- '''An easy-interpretable classifier optimizing simple logical rules.
- Params
- ------
- estimator: object with fit and predict methods
- Defaults to DecisionTreeClassifier with AdaBoost.
- For SLIPPER, should pass estimator=imodels.SlipperBaseEstimator
- '''
- def __init__(
- self,
- estimator=DecisionTreeClassifier(max_depth=1),
- *,
- n_estimators=15,
- learning_rate=1.0,
- random_state=None,
- ):
- try: # sklearn version >= 1.2
- super().__init__(
- estimator=estimator,
- n_estimators=n_estimators,
- learning_rate=learning_rate,
- random_state=random_state,
- )
- except: # sklearn version < 1.2
- super().__init__(
- base_estimator=estimator,
- n_estimators=n_estimators,
- learning_rate=learning_rate,
- random_state=random_state,
- )
- self.estimator = estimator
- def fit(self, X, y, feature_names=None, **kwargs):
- X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
- super().fit(X, y, **kwargs)
- self.complexity_ = len(self.estimators_)
- class BoostedRulesRegressor(AdaBoostRegressor):
- '''An easy-interpretable regressor optimizing simple logical rules.
- Params
- ------
- estimator: object with fit and predict methods
- Defaults to DecisionTreeRegressor with AdaBoost.
- '''
- def __init__(
- self,
- estimator=DecisionTreeRegressor(max_depth=1),
- *,
- n_estimators=15,
- learning_rate=1.0,
- random_state=13,
- ):
- try: # sklearn version >= 1.2
- super().__init__(
- estimator=estimator,
- n_estimators=n_estimators,
- learning_rate=learning_rate,
- random_state=random_state,
- )
- except: # sklearn version < 1.2
- super().__init__(
- base_estimator=estimator,
- n_estimators=n_estimators,
- learning_rate=learning_rate,
- random_state=random_state,
- )
- self.estimator = estimator
- def fit(self, X, y, feature_names=None, **kwargs):
- X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
- super().fit(X, y, **kwargs)
- self.complexity_ = len(self.estimators_)
- if __name__ == '__main__':
- np.random.seed(13)
- X, Y = sklearn.datasets.load_breast_cancer(as_frame=True, return_X_y=True)
- model = BoostedRulesClassifier(estimator=DecisionTreeClassifier)
- X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3)
- model.fit(X_train, y_train, feature_names=X_train.columns)
- y_pred = model.predict(X_test)
- acc = model.score(X_test, y_test)
- print('acc', acc, 'complexity', model.complexity_)
- print(model)
|