GauravMohan1
/
Emotion-Classification


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
            def read_data():
    import pandas as pd
    df = pd.read_csv('data/raw/fer2013.csv')
    return df

def standardize_data(df):
    import numpy as np 
    import pandas as pd
    import json
    emotion_counts = df.emotion.value_counts()
    emotion_counts_dict = emotion_counts.to_dict()
    with open('./data/emotion_counts.json', 'w') as f:
        json.dump(emotion_counts_dict, f)

    #standardize the data to make compatible with model
    img_array = df.pixels.apply(lambda x: np.array(x.split(' ')).reshape(48, 48).astype('float32'))
    img_array = np.stack(img_array, axis = 0)

    return img_array

def get_features(img_array):
    import numpy as np
    import cv2
    img_features = []
    for i in range(len(img_array)):
        temp = cv2.cvtColor(img_array[i], cv2.COLOR_GRAY2RGB)
        img_features.append(temp)

    img_features = np.array(img_features)

    return img_features

def encode_features(df):
    from keras.utils import to_categorical
    # One-Hot Encoding of emotion column
    labels = df['emotion'].values
    encoded_labels = to_categorical(labels, num_classes=7)
    
    return encoded_labels

def split_save_data(img_features, img_labels, df):
    from sklearn.model_selection import train_test_split
    import numpy as np
    
    X_train, X_valid, y_train, y_valid = train_test_split(
        img_features, 
        img_labels, 
        shuffle=True, 
        stratify=df.emotion, 
        test_size=0.15, 
        random_state=42)

    # Normalize Data before saving it
    X_train = X_train / 255.
    X_valid = X_valid / 255.
    
    # Save the arrays to disk as .npy files
    np.save('./data/processed/X_train.npy', X_train)
    np.save('./data/processed/X_valid.npy', X_valid)
    np.save('./data/processed/y_train.npy', y_train)
    np.save('./data/processed/y_valid.npy', y_valid)
    print("done.")


if __name__ == '__main__':
    ## Refer to main branch preprocess.py if you want to stream the data file directly from the remote repo
    
    df = read_data()
    img_array = standardize_data(df)
    img_labels = encode_features(df)
    img_features = get_features(img_array)
    split_save_data(img_features, img_labels, df)