Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

rule.py 3.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
  1. import copy
  2. import re
  3. from collections import OrderedDict
  4. from typing import Dict, Iterable
  5. class Rule:
  6. """ An object modeling a logical rule and add factorization methods.
  7. It is used to simplify rules and deduplicate them.
  8. Parameters
  9. ----------
  10. rule : str
  11. The logical rule that is interpretable by a pandas query.
  12. args : object, optional
  13. Arguments associated to the rule, it is not used for factorization
  14. but it takes part of the output when the rule is converted to an array.
  15. """
  16. def __init__(self, rule, args=None, support=None):
  17. self.rule = rule
  18. self.args = args
  19. self.support = support
  20. self.terms = [t.split(' ') for t in self.rule.split(' and ')]
  21. self.agg_dict = {}
  22. self.factorize()
  23. self.rule = str(self)
  24. def __eq__(self, other):
  25. return self.agg_dict == other.agg_dict
  26. def __hash__(self):
  27. # FIXME : Easier method ?
  28. return hash(tuple(sorted(((i, j) for i, j in self.agg_dict.items()))))
  29. def factorize(self) -> None:
  30. for feature, symbol, value in self.terms:
  31. if (feature, symbol) not in self.agg_dict:
  32. if symbol != '==':
  33. self.agg_dict[(feature, symbol)] = str(float(value))
  34. else:
  35. self.agg_dict[(feature, symbol)] = value
  36. else:
  37. if symbol[0] == '<':
  38. self.agg_dict[(feature, symbol)] = str(min(
  39. float(self.agg_dict[(feature, symbol)]),
  40. float(value)))
  41. elif symbol[0] == '>':
  42. self.agg_dict[(feature, symbol)] = str(max(
  43. float(self.agg_dict[(feature, symbol)]),
  44. float(value)))
  45. else: # Handle the c0 == c0 case
  46. self.agg_dict[(feature, symbol)] = value
  47. def __iter__(self):
  48. yield str(self)
  49. yield self.args
  50. def __repr__(self):
  51. return ' and '.join([' '.join(
  52. [feature, symbol, str(self.agg_dict[(feature, symbol)])])
  53. for feature, symbol in sorted(self.agg_dict.keys())
  54. ])
  55. def replace_feature_name(rule: Rule, replace_dict: Dict[str, str]) -> Rule:
  56. def replace(match):
  57. return replace_dict[match.group(0)]
  58. rule_replaced = copy.copy(rule)
  59. rule_replaced.rule = re.sub('|'.join(r'\b%s\b' % re.escape(s) for s in replace_dict), replace, rule.rule)
  60. replaced_agg_dict = {}
  61. for feature, symbol in rule_replaced.agg_dict:
  62. replaced_agg_dict[(replace_dict[feature], symbol)] = rule_replaced.agg_dict[(feature, symbol)]
  63. rule_replaced.agg_dict = replaced_agg_dict
  64. return rule_replaced
  65. def get_feature_dict(num_features: int, feature_names: Iterable[str] = None) -> Dict[str, str]:
  66. feature_dict = OrderedDict()
  67. if feature_names is not None:
  68. for i in range(num_features):
  69. feature_dict[f'X_{i}'] = feature_names[i]
  70. else:
  71. for i in range(num_features):
  72. feature_dict[f'X_{i}'] = f'X_{i}'
  73. return feature_dict
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...