Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

preprocess.py 2.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
  1. def read_data():
  2. import pandas as pd
  3. df = pd.read_csv('data/raw/fer2013.csv')
  4. return df
  5. def standardize_data(df):
  6. import numpy as np
  7. import pandas as pd
  8. import json
  9. emotion_counts = df.emotion.value_counts()
  10. emotion_counts_dict = emotion_counts.to_dict()
  11. with open('./data/emotion_counts.json', 'w') as f:
  12. json.dump(emotion_counts_dict, f)
  13. #standardize the data to make compatible with model
  14. img_array = df.pixels.apply(lambda x: np.array(x.split(' ')).reshape(48, 48).astype('float32'))
  15. img_array = np.stack(img_array, axis = 0)
  16. return img_array
  17. def get_features(img_array):
  18. import numpy as np
  19. import cv2
  20. img_features = []
  21. for i in range(len(img_array)):
  22. temp = cv2.cvtColor(img_array[i], cv2.COLOR_GRAY2RGB)
  23. img_features.append(temp)
  24. img_features = np.array(img_features)
  25. return img_features
  26. def encode_features(df):
  27. from keras.utils import to_categorical
  28. # One-Hot Encoding of emotion column
  29. labels = df['emotion'].values
  30. encoded_labels = to_categorical(labels, num_classes=7)
  31. return encoded_labels
  32. def split_save_data(img_features, img_labels, df):
  33. from sklearn.model_selection import train_test_split
  34. import numpy as np
  35. X_train, X_valid, y_train, y_valid = train_test_split(
  36. img_features,
  37. img_labels,
  38. shuffle=True,
  39. stratify=df.emotion,
  40. test_size=0.15,
  41. random_state=42)
  42. # Normalize Data before saving it
  43. X_train = X_train / 255.
  44. X_valid = X_valid / 255.
  45. # Save the arrays to disk as .npy files
  46. np.save('./data/processed/X_train.npy', X_train)
  47. np.save('./data/processed/X_valid.npy', X_valid)
  48. np.save('./data/processed/y_train.npy', y_train)
  49. np.save('./data/processed/y_valid.npy', y_valid)
  50. print("done.")
  51. if __name__ == '__main__':
  52. ## Refer to main branch preprocess.py if you want to stream the data file directly from the remote repo
  53. df = read_data()
  54. img_array = standardize_data(df)
  55. img_labels = encode_features(df)
  56. img_features = get_features(img_array)
  57. split_save_data(img_features, img_labels, df)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...