1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
- import os
- import unittest
- import numpy as np
- import pandas as pd
- from scipy.io.arff import loadarff
- from sklearn.model_selection import train_test_split
- from imodels.rule_list.bayesian_rule_list.bayesian_rule_list import BayesianRuleListClassifier
- from imodels.discretization import ExtraBasicDiscretizer
- path_to_tests = os.path.dirname(os.path.realpath(__file__))
- class TestBRL(unittest.TestCase):
- def test_integration_stability(self):
- '''Test on synthetic dataset
- '''
- X = np.array([[0, 0, 1, 1, 0],
- [1, 0, 0, 0, 0],
- [0, 0, 1, 0, 0],
- [1, 0, 0, 0, 0],
- [1, 1, 0, 1, 1],
- [1, 1, 1, 1, 1],
- [0, 1, 1, 1, 1],
- [1, 0, 1, 1, 1]])
- y = np.array([0, 0, 0, 0, 1, 1, 1, 1])
- M = BayesianRuleListClassifier(minsupport=0.02, maxcardinality=1)
- feat = ['ft1', 'ft2', 'ft3', 'ft4', 'ft5']
- M.fit(X, y, feature_names=feat)
- assert (np.array([M.predict(np.array([row]), threshold=0.5)
- for row in X]).flatten() == y).all()
- # def test_integration_fitting(self):
- # '''Test on a real (small) dataset
- # '''
- # np.random.seed(13)
- # feature_names = ["#Pregnant", "Glucose concentration test", "Blood pressure(mmHg)",
- # "Triceps skin fold thickness(mm)",
- # "2-Hour serum insulin (mu U/ml)", "Body mass index", "Diabetes pedigree function",
- # "Age (years)"]
- # data = loadarff(os.path.join(path_to_tests, "test_data/diabetes.arff"))
- # data_np = np.array(list(map(lambda x: np.array(list(x)), data[0])))
- # X, y_text = data_np[:, :-1].astype('float32'), data_np[:, -1].astype('str')
- # y = (y_text == 'tested_positive').astype(int) # labels 0-1
- # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8) # split
- # disc = ExtraBasicDiscretizer(feature_names, n_bins=3, strategy='uniform')
- # X_train_disc = disc.fit_transform(pd.DataFrame(X_train, columns=feature_names))
- # X_test_disc = disc.transform(pd.DataFrame(X_test, columns=feature_names))
- # # train classifier (allow more iterations for better accuracy; use BigDataRuleListClassifier for large datasets)
- # print('training...')
- # model = BayesianRuleListClassifier(max_iter=300, minsupport=0.4, maxcardinality=1, class1label="diabetes",
- # verbose=False)
- # model.fit(X_train_disc.values, y_train, feature_names=X_train_disc.columns)
- # preds = model.predict(X_test_disc.values, threshold=0.1)
- # print("RuleListClassifier Accuracy:", np.mean(y_test == preds), "Learned interpretable model:\n", model)
|