1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
- import os
- import random
- from functools import partial
- import numpy as np
- import pandas as pd
- from sklearn.tree import DecisionTreeRegressor
- from imodels import FIGSClassifier, FIGSRegressor, FIGSClassifierCV, FIGSRegressorCV
- from imodels.experimental.figs_ensembles import FIGSExtRegressor, FIGSExtClassifier
- from sklearn.ensemble import StackingRegressor, VotingRegressor, BaggingClassifier
- path_to_tests = os.path.dirname(os.path.realpath(__file__))
- class TestFIGS:
- def setup_method(self):
- '''Test on synthetic dataset
- '''
- np.random.seed(13)
- random.seed(13)
- self.n = 100
- self.p = 2
- self.X = (np.random.randn(self.n, self.p) > 0).astype(int)
- # y = x0 > 0 * x1 > 0
- self.y_classification_binary = (self.X[:, 0] > 0).astype(int) * (
- self.X[:, 1] > 0).astype(int)
- self.y_reg = self.X[:, 0] + self.X[:, 1]
- def test_recognized_by_sklearn(self):
- base_models = [('figs', FIGSRegressor()),
- ('random_forest', DecisionTreeRegressor())]
- comb_model = VotingRegressor(estimators=base_models,
- n_jobs=10,
- verbose=2)
- comb_model.fit(self.X, self.y_reg)
- # def test_categorical(self):
- # """Test FIGS with categorical data"""
- # categories = ['cat', 'dog', 'bird', 'fish']
- # categories_2 = ['bear', 'chicken', 'cow']
- # self.X_cat = pd.DataFrame(self.X)
- # self.X_cat['pet1'] = np.random.choice(categories, size=(self.n, 1))
- # self.X_cat['pet2'] = np.random.choice(categories_2, size=(self.n, 1))
- # figs_reg = FIGSRegressor()
- # figs_cls = FIGSClassifier()
- # figs_reg.fit(self.X_cat, self.y_reg,
- # categorical_features=["pet1", 'pet2'])
- # figs_reg.predict(self.X_cat, categorical_features=["pet1", 'pet2'])
- # figs_cls.fit(self.X_cat, self.y_reg,
- # categorical_features=["pet1", 'pet2'])
- # figs_cls.predict_proba(
- # self.X_cat, categorical_features=["pet1", 'pet2'])
- def test_fitting(self):
- '''Test on a real (small) dataset
- '''
- for model_type in [
- FIGSClassifier, FIGSRegressor,
- FIGSExtClassifier, FIGSExtRegressor,
- FIGSClassifierCV, FIGSRegressorCV,
- partial(BaggingClassifier,
- estimator=FIGSExtClassifier(max_rules=3),
- n_estimators=2),
- ]:
- init_kwargs = {}
- m = model_type(**init_kwargs)
- X = self.X
- m.fit(X, self.y_classification_binary)
- # test predict()
- preds = m.predict(X) # > 0.5).astype(int)
- assert preds.size == self.n, 'predict() yields right size'
- # test preds_proba()
- if model_type in [FIGSClassifier, FIGSClassifierCV, BaggingClassifier]:
- preds_proba = m.predict_proba(X)
- assert len(preds_proba.shape) == 2, 'preds_proba has 2 columns'
- assert preds_proba.shape[1] == 2, 'preds_proba has 2 columns'
- assert np.max(
- preds_proba) < 1.1, 'preds_proba has no values over 1'
- assert (np.argmax(preds_proba, axis=1) == preds).all(), ("predict_proba and "
- "predict correspond")
- # test acc
- acc_train = np.mean(preds == self.y_classification_binary)
- assert acc_train > 0.8, 'acc greater than 0.9'
- # print(m)
- if not type(m) in [FIGSClassifierCV, FIGSRegressorCV, BaggingClassifier]:
- trees = m.trees_
- assert len(trees) == 1, 'only one tree'
- assert trees[0].feature == 1, 'split on feat 1'
- assert np.abs(trees[0].left.value) < 0.01, 'left value 0'
- assert trees[0].left.left is None and trees[0].left.right is None, 'left is leaf'
- assert np.abs(
- trees[0].right.left.value) < 0.01, 'right-left value 0'
- assert np.abs(trees[0].right.right.value -
- 1) < 0.01, 'right-right value 1'
- if __name__ == '__main__':
- t = TestFIGS()
- t.setup_method()
- t.test_fitting()
- t.test_categorical()
|