Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

feature_engineering.py 1.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  1. import os
  2. import pandas as pd
  3. import plac
  4. from sklearn.decomposition import PCA
  5. from src.utils import dump_yaml, read_data, read_yaml, save_results
  6. @plac.annotations(
  7. data_path=("Path to source data", "option", "i", str),
  8. feature_path=("Path to save featurized data", "option", "f", str),
  9. out_path=("Path to save pca model", "option", "o", str),
  10. )
  11. def main(
  12. data_path: str = "data/split/",
  13. feature_path: str = "data/features/",
  14. out_path: str = "models/pca/",
  15. ) -> None:
  16. X_train, X_test, y_train, y_test = read_data(data_path)
  17. params = read_yaml("params.yaml", "pca")
  18. pca = PCA(**params).fit(X_train)
  19. train_feature = pd.DataFrame(pca.transform(X_train))
  20. test_feature = pd.DataFrame(pca.transform(X_test))
  21. train_feature["class"] = y_train
  22. test_feature["class"] = y_test
  23. if not os.path.isdir(feature_path):
  24. os.mkdir(feature_path)
  25. train_feature.to_csv(f"{feature_path}train.csv", index=False)
  26. test_feature.to_csv(f"{feature_path}test.csv", index=False)
  27. save_results(out_path, pca, None)
  28. print("Finished Feature Engineering:\nStats:")
  29. print(f"\tExplained Variance: {pca.explained_variance_}")
  30. print(f"\tExplained Variance Ratio: {pca.explained_variance_ratio_}")
  31. dump_yaml(
  32. dict(
  33. explained_variance=pca.explained_variance_.tolist(),
  34. explained_variance_ratio=pca.explained_variance_ratio_.tolist(),
  35. ),
  36. "models/pca/metrics.yaml",
  37. )
  38. if __name__ == "__main__":
  39. plac.call(main)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...