Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

brl_test.py 2.8 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  1. import os
  2. import unittest
  3. import numpy as np
  4. import pandas as pd
  5. from scipy.io.arff import loadarff
  6. from sklearn.model_selection import train_test_split
  7. from imodels.rule_list.bayesian_rule_list.bayesian_rule_list import BayesianRuleListClassifier
  8. from imodels.discretization import ExtraBasicDiscretizer
  9. path_to_tests = os.path.dirname(os.path.realpath(__file__))
  10. class TestBRL(unittest.TestCase):
  11. def test_integration_stability(self):
  12. '''Test on synthetic dataset
  13. '''
  14. X = np.array([[0, 0, 1, 1, 0],
  15. [1, 0, 0, 0, 0],
  16. [0, 0, 1, 0, 0],
  17. [1, 0, 0, 0, 0],
  18. [1, 1, 0, 1, 1],
  19. [1, 1, 1, 1, 1],
  20. [0, 1, 1, 1, 1],
  21. [1, 0, 1, 1, 1]])
  22. y = np.array([0, 0, 0, 0, 1, 1, 1, 1])
  23. M = BayesianRuleListClassifier(minsupport=0.02, maxcardinality=1)
  24. feat = ['ft1', 'ft2', 'ft3', 'ft4', 'ft5']
  25. M.fit(X, y, feature_names=feat)
  26. assert (np.array([M.predict(np.array([row]), threshold=0.5)
  27. for row in X]).flatten() == y).all()
  28. # def test_integration_fitting(self):
  29. # '''Test on a real (small) dataset
  30. # '''
  31. # np.random.seed(13)
  32. # feature_names = ["#Pregnant", "Glucose concentration test", "Blood pressure(mmHg)",
  33. # "Triceps skin fold thickness(mm)",
  34. # "2-Hour serum insulin (mu U/ml)", "Body mass index", "Diabetes pedigree function",
  35. # "Age (years)"]
  36. # data = loadarff(os.path.join(path_to_tests, "test_data/diabetes.arff"))
  37. # data_np = np.array(list(map(lambda x: np.array(list(x)), data[0])))
  38. # X, y_text = data_np[:, :-1].astype('float32'), data_np[:, -1].astype('str')
  39. # y = (y_text == 'tested_positive').astype(int) # labels 0-1
  40. # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8) # split
  41. # disc = ExtraBasicDiscretizer(feature_names, n_bins=3, strategy='uniform')
  42. # X_train_disc = disc.fit_transform(pd.DataFrame(X_train, columns=feature_names))
  43. # X_test_disc = disc.transform(pd.DataFrame(X_test, columns=feature_names))
  44. # # train classifier (allow more iterations for better accuracy; use BigDataRuleListClassifier for large datasets)
  45. # print('training...')
  46. # model = BayesianRuleListClassifier(max_iter=300, minsupport=0.4, maxcardinality=1, class1label="diabetes",
  47. # verbose=False)
  48. # model.fit(X_train_disc.values, y_train, feature_names=X_train_disc.columns)
  49. # preds = model.predict(X_test_disc.values, threshold=0.1)
  50. # print("RuleListClassifier Accuracy:", np.mean(y_test == preds), "Learned interpretable model:\n", model)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...