Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

components.py 1.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
  1. '''
  2. Principal Components Analysis - Optimal Number of Components
  3. '''
  4. import sys
  5. from sklearn.decomposition import PCA
  6. from sklearn.preprocessing import StandardScaler
  7. import matplotlib.pyplot as plt
  8. import numpy as np
  9. import pandas as pd
  10. from sklearn.preprocessing import scale
  11. import seaborn as sns
  12. def set_trace():
  13. """A Poor mans break point"""
  14. # without this in iPython debugger can generate strange characters.
  15. from IPython.core.debugger import Pdb
  16. Pdb().set_trace(sys._getframe().f_back)
  17. # Choosing number of Principal Components
  18. def graph_pca(x):
  19. x_std = StandardScaler().fit_transform(x)
  20. var_exp_list = []
  21. max_comp = 750
  22. components = range(1, max_comp)
  23. for component in components:
  24. model = PCA(n_components=component)
  25. x_reduced = model.fit_transform(x_std)
  26. explained_var = model.explained_variance_ratio_.cumsum()[-1]
  27. var_exp_list.append(explained_var)
  28. df = pd.DataFrame(var_exp_list, columns=['Variance Explained'])
  29. df['Components'] = range(1, max_comp)
  30. sns.lmplot('Components', 'Variance Explained', data=df, fit_reg=False)
  31. plt.savefig('C:\Thesis111217\CriminalClassifier\Outputs\pca_graph.png')
  32. pca_opt = next(i for i, v in enumerate(var_exp_list) if v > 1)
  33. return pca_opt
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...