Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

ensemble.py 4.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  1. from sklearn.base import BaseEstimator, RegressorMixin, clone
  2. from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
  3. import numpy as np
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.metrics import mean_squared_error
  6. from sklearn.tree import DecisionTreeRegressor
  7. from sklearn.linear_model import LinearRegression
  8. import imodels
  9. import imodels.algebraic.gam_multitask
  10. class ResidualBoostingRegressor(BaseEstimator, RegressorMixin):
  11. def __init__(self, estimator, n_estimators=10):
  12. """
  13. A meta-estimator that fits a base estimator to the residuals of the
  14. previous estimators.
  15. Parameters:
  16. - estimator: The estimator to fit on the residual of the previous step.
  17. - n_estimators: The number of estimators to fit.
  18. """
  19. self.estimator = estimator
  20. self.n_estimators = n_estimators
  21. def fit(self, X, y):
  22. """
  23. Fit the ensemble of base estimators on the training data.
  24. Parameters:
  25. - X: array-like of shape (n_samples, n_features)
  26. Training data.
  27. - y: array-like of shape (n_samples,)
  28. Target values.
  29. Returns:
  30. - self: object
  31. """
  32. # Check that X and y have correct shape
  33. X, y = check_X_y(X, y)
  34. self.estimators_ = []
  35. current_prediction = np.zeros(y.shape)
  36. for _ in range(self.n_estimators):
  37. residual = y - current_prediction
  38. estimator = clone(self.estimator)
  39. estimator.fit(X, residual)
  40. self.estimators_.append(estimator)
  41. current_prediction += estimator.predict(X)
  42. return self
  43. def predict(self, X):
  44. """
  45. Predict regression target for X.
  46. Parameters:
  47. - X: array-like of shape (n_samples, n_features)
  48. The input samples.
  49. Returns:
  50. - y_pred: ndarray of shape (n_samples,)
  51. The predicted values.
  52. """
  53. # Check is fit had been called
  54. check_is_fitted(self)
  55. # Input validation
  56. X = check_array(X)
  57. predictions = sum(estimator.predict(X)
  58. for estimator in self.estimators_)
  59. return predictions
  60. class SimpleBaggingRegressor:
  61. def __init__(self, estimator, n_estimators=10, random_state=None):
  62. self.estimator = estimator
  63. self.n_estimators = n_estimators
  64. self.random_state = random_state
  65. def fit(self, X, y):
  66. np.random.seed(self.random_state)
  67. self.estimators_ = []
  68. rng = np.random.default_rng(self.random_state)
  69. for _ in range(self.n_estimators):
  70. # Simple bootstrap sampling
  71. # sample_indices = np.random.choice(
  72. # range(X.shape[0]), size=X.shape[0], replace=True)
  73. sample_indices = rng.choice(
  74. range(X.shape[0]), size=X.shape[0], replace=True)
  75. X_sample = X[sample_indices]
  76. y_sample = y[sample_indices]
  77. # Fit a base estimator
  78. # estimator = DecisionTreeRegressor()
  79. estimator = clone(self.estimator)
  80. estimator.fit(X_sample, y_sample)
  81. self.estimators_.append(estimator)
  82. def predict(self, X):
  83. # Collect predictions from each base estimator
  84. predictions = np.array([estimator.predict(X)
  85. for estimator in self.estimators_])
  86. # Aggregate predictions
  87. return np.mean(predictions, axis=0)
  88. if __name__ == '__main__':
  89. X, y, feature_names = imodels.get_clean_dataset('california_housing')
  90. X_train, X_test, y_train, y_test = train_test_split(
  91. X, y, test_size=0.2, random_state=42)
  92. X_train = X_train[:50, :2]
  93. y_train = y_train[:50]
  94. X_test = X_test[:50, :2]
  95. y_test = y_test[:50]
  96. # estimator = DecisionTreeRegressor(max_depth=3)
  97. estimator = imodels.algebraic.gam_multitask.MultiTaskGAMRegressor()
  98. for n_estimators in [1, 3, 5]:
  99. # residual_boosting_regressor = ResidualBoostingRegressor(
  100. # estimator=estimator, n_estimators=n_estimators)
  101. residual_boosting_regressor = SimpleBaggingRegressor(
  102. estimator=estimator, n_estimators=n_estimators)
  103. residual_boosting_regressor.fit(X_train, y_train)
  104. y_pred = residual_boosting_regressor.predict(X_test)
  105. mse_train = mean_squared_error(
  106. y_train, residual_boosting_regressor.predict(X_train))
  107. mse = mean_squared_error(y_test, y_pred)
  108. print(
  109. f'MSE with {n_estimators} estimators: {mse:.2f} (train: {mse_train:.2f})')
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...