Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

tree_interaction_utils.py 3.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  1. import itertools
  2. from typing import Set, Tuple
  3. import numpy as np
  4. import pandas as pd
  5. def make_rj(n=300, p=50):
  6. """Generates data according to the model in Radchenko & James, 2010
  7. X_i ~ Unif([0,1]^p)
  8. y = sqrt(0.5)[sum_{i=1}^5 f_i(x) + f_1(x)f_2(x) + f_1(x)f_3(x)] + N(0,1)
  9. f_1(x) = x1, f_2(x) = (1+x2)^{-1}, f_3(x) = sin(x3), f_4(x) = e^x4, f_5(x) = x5^2
  10. function withing the sum are normalized
  11. Params
  12. ------
  13. n (int): number of sample
  14. p (int): number of features
  15. Returns
  16. -------
  17. Tuple[np.array, np.array]: design matrix and label vector
  18. """
  19. X = np.random.uniform(0, 1, size=(n, p))
  20. f_1 = X[:, 0]
  21. f_2 = (1 + X[:, 1]) ** (-1)
  22. f_3 = np.sin(X[:, 2])
  23. f_4 = np.exp(X[:, 3])
  24. f_5 = X[:, 4] ** (2)
  25. def _normalize_vec(v):
  26. return (v - np.mean(v)) / np.std(v)
  27. effects = _normalize_vec(f_1) + _normalize_vec(f_2) + _normalize_vec(f_3) + _normalize_vec(f_4) + _normalize_vec(
  28. f_5)
  29. interactions = f_1 * f_2 + f_1 * f_3
  30. y = effects + interactions + np.random.normal(size=n)
  31. return X, y
  32. def make_vp(n=100, p=100):
  33. """Generates data according to https://arxiv.org/abs/1607.02670 (Sparse additive Gaussian process with soft interactions)
  34. X_i ~ N(0, I)
  35. y = x1 + x2^2 + x3 + x4^2 + x5 + x1x2 + x2x3 + x3x4 + N(0, 0.14)
  36. Args:
  37. n (int): number of sample
  38. p (int): number of features
  39. Returns:
  40. Tuple[np.array, np.array]: design matrix and label vector
  41. """
  42. X = np.random.normal(size=(n, p))
  43. effects = X[:, 0] + X[:, 1] ** 2 + X[:, 2] + X[:, 3] ** 2 + X[:, 4]
  44. interactions = X[:, 0] * X[:, 1] + X[:, 1] * X[:, 2] + X[:, 2] * X[:, 3]
  45. y = effects + interactions + np.random.normal(scale=0.14, size=n)
  46. return X, y
  47. def get_gt(dataset_name):
  48. important_features = []
  49. interactions = []
  50. if dataset_name == "friedman1":
  51. important_features = [0, 1, 2, 3, 4]
  52. interactions = [(0, 1)]
  53. elif dataset_name == "radchenko_james":
  54. important_features = [0, 1, 2, 3, 4]
  55. interactions = [(0, 1), (0, 2)]
  56. elif dataset_name == "vo_pati":
  57. important_features = [0, 1, 2, 3, 4]
  58. interactions = [(0, 1), (1, 2), (2, 3)]
  59. return set(important_features), set(interactions)
  60. def get_important_features(importance, k):
  61. return set(np.argsort(importance)[0:k])
  62. def get_interacting_features(interaction, k):
  63. scores_list = []
  64. for ind_1, ind_2 in itertools.combinations(range(interaction.shape[0]), 2):
  65. scores_list.append([interaction[ind_1, ind_2], ind_1, ind_2])
  66. df = pd.DataFrame(scores_list)
  67. df = df.sort_values(0, ascending=False)
  68. interactions = []
  69. for i in range(k):
  70. interactions.append((df.iloc[i, 1], df.iloc[i, 2]))
  71. return set(interactions)
  72. def interaction_fpr(i_gt: Set[Tuple], i_hat: Set[Tuple], p: int):
  73. if len(i_gt) == 0:
  74. return
  75. n_pairs = 0.5 * p * (p - 1)
  76. n_non_interacting_pairs = (n_pairs - len(i_gt))
  77. return len(i_hat.difference(i_gt)) / n_non_interacting_pairs
  78. def interaction_tpr(i_gt: Set[Tuple], i_hat: Set[Tuple], p: int):
  79. if len(i_gt) == 0:
  80. return
  81. n_interactions = len(i_gt)
  82. return len(i_hat.intersection(i_gt)) / n_interactions
  83. def interaction_f1(i_gt: Set[Tuple], i_hat: Set[Tuple], p: int):
  84. if len(i_gt) == 0:
  85. return
  86. recall = len(i_gt.intersection(i_hat)) / len(i_gt)
  87. precision = interaction_tpr(i_hat, i_gt, p)
  88. if recall + precision == 0:
  89. return 0
  90. return 2 * ((precision * recall) / (precision + recall))
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...