1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
- import numpy as np
- from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
- from sklearn.tree import _tree
- from typing import Union, List, Tuple
- def tree_to_rules(tree: Union[DecisionTreeClassifier, DecisionTreeRegressor],
- feature_names: List[str],
- prediction_values: bool = False, round_thresholds=True) -> List[str]:
- """
- Return a list of rules from a tree
- Parameters
- ----------
- tree : Decision Tree Classifier/Regressor
- feature_names: list of variable names
- Returns
- -------
- rules : list of rules.
- """
- # XXX todo: check the case where tree is build on subset of features,
- # ie max_features != None
- tree_ = tree.tree_
- feature_name = [
- feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
- for i in tree_.feature
- ]
- rules = []
- def recurse(node, base_name):
- if tree_.feature[node] != _tree.TREE_UNDEFINED:
- name = feature_name[node]
- symbol = '<='
- symbol2 = '>'
- threshold = tree_.threshold[node]
- if round_thresholds:
- threshold = np.round(threshold, decimals=5)
- text = base_name + ["{} {} {}".format(name, symbol, threshold)]
- recurse(tree_.children_left[node], text)
- text = base_name + ["{} {} {}".format(name, symbol2,
- threshold)]
- recurse(tree_.children_right[node], text)
- else:
- rule = str.join(' and ', base_name)
- rule = (rule if rule != ''
- else ' == '.join([feature_names[0]] * 2))
- # a rule selecting all is set to "c0==c0"
- if prediction_values:
- rules.append((rule, tree_.value[node][0].tolist()))
- else:
- rules.append(rule)
- recurse(0, [])
- return rules if len(rules) > 0 else 'True'
- def tree_to_code(clf, feature_names):
- '''Prints a tree with a single split
- '''
- n_nodes = clf.tree_.node_count
- children_left = clf.tree_.children_left
- children_right = clf.tree_.children_right
- feature = clf.tree_.feature
- threshold = clf.tree_.threshold
- node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
- is_leaves = np.zeros(shape=n_nodes, dtype=bool)
- stack = [(0, 0)] # start with the root node id (0) and its depth (0)
- s = ''
- while len(stack) > 0:
- # `pop` ensures each node is only visited once
- node_id, depth = stack.pop()
- node_depth[node_id] = depth
- # If the left and right child of a node is not the same we have a split
- # node
- is_split_node = children_left[node_id] != children_right[node_id]
- # If a split node, append left and right children and depth to `stack`
- # so we can loop through them
- if is_split_node:
- stack.append((children_left[node_id], depth + 1))
- stack.append((children_right[node_id], depth + 1))
- else:
- is_leaves[node_id] = True
- # print("The binary tree structure has {n} nodes and has "
- # "the following tree structure:\n".format(n=n_nodes))
- for i in range(n_nodes):
- if is_leaves[i]:
- pass
- # print("{space}node={node} is a leaf node.".format(
- # space=node_depth[i] * "\t", node=i))
- else:
- s += f"{feature_names[feature[i]]} <= {threshold[i]}"
- return f"\033[96m{s}\033[00m\n"
- def itemsets_to_rules(itemsets: List[Tuple]) -> List[str]:
- itemsets_clean = list(filter(lambda it: it != 'null' and 'All' not in ''.join(it), itemsets))
- f = lambda itemset: ' and '.join([single_discretized_feature_to_rule(item) for item in itemset])
- return list(map(f, itemsets_clean))
- def dict_to_rule(rule, clf_feature_dict):
- """
- Function to accept rule dict and convert to Rule object
- Parameters:
- rule: list of dict of schema
- [
- {
- 'feature': int,
- 'operator': str,
- 'value': float
- },
- ]
- """
- output = ''
- for condition in rule:
- output += '{} {} {} and '.format(
- clf_feature_dict[int(condition['feature'])],
- condition['operator'],
- condition['pivot']
- )
- return output[:-5]
- def single_discretized_feature_to_rule(feat: str) -> str:
- # categorical feature
- if '_to_' not in feat:
- return f'{feat} > 0.5'
- # discretized numeric feature
- feat_split = feat.split('_to_')
- upper_value = feat_split[-1]
- lower_value = feat_split[-2].split('_')[-1]
- lower_to_upper_len = 1 + len(lower_value) + 4 + len(upper_value)
- feature_name = feat[:-lower_to_upper_len]
- if lower_value == '-inf':
- rule = f'{feature_name} <= {upper_value}'
- elif upper_value == 'inf':
- rule = f'{feature_name} > {lower_value}'
- else:
- rule = f'{feature_name} > {lower_value} and {feature_name} <= {upper_value}'
- return rule
|