Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

gosdt_tst.py 1.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
  1. from imodels import OptimalTreeClassifier
  2. from imodels.util.data_util import get_clean_dataset
  3. DATASETS_CLASSIFICATION = [
  4. # classification datasets from original random forests paper
  5. # page 9: https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf
  6. # ("sonar", "sonar", "pmlb"),
  7. # ("heart", "heart", 'imodels'),
  8. # ("breast-cancer", "breast_cancer", 'imodels'),
  9. # ("haberman", "haberman", 'imodels'),
  10. ("ionosphere", "ionosphere", 'pmlb'),
  11. ("diabetes", "diabetes", "pmlb"),
  12. # # #("liver", "8", "openml"), # note: we omit this dataset bc it's label was found to be incorrect (see caveat here: https://archive.ics.uci.edu/ml/datasets/liver+disorders#:~:text=The%207th%20field%20(selector)%20has%20been%20widely%20misinterpreted%20in%20the%20past%20as%20a%20dependent%20variable%20representing%20presence%20or%20absence%20of%20a%20liver%20disorder.)
  13. # # #("credit-g", "credit_g", 'imodels'), # like german-credit, but more feats
  14. # ("german-credit", "german", "pmlb"),
  15. #
  16. # #clinical-decision rules
  17. # #("iai-pecarn", "iai_pecarn.csv", "imodels"),
  18. #
  19. # #popular classification datasets used in rule-based modeling / fairness
  20. # # page 7: http://proceedings.mlr.press/v97/wang19a/wang19a.pdf
  21. # ("juvenile", "juvenile_clean", 'imodels'),
  22. # ("recidivism", "compas_two_year_clean", 'imodels'),
  23. # # ("credit", "credit_card_clean", 'imodels'),
  24. # # ("readmission", 'readmission_clean', 'imodels'), # v big
  25. ]
  26. if __name__ == '__main__':
  27. for d in DATASETS_CLASSIFICATION:
  28. gosdt_cls = OptimalTreeClassifier()
  29. X, y, feat_names = get_clean_dataset(d[1], data_source=d[2])
  30. gosdt_cls.fit(X, y)
  31. pass
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...