AiswaryaSrinivas
/
Mayo_Stroke_Blood_Clot_Origin


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
37

	
38

	
39

	
40

	
41

	
42

	
43

	
44

	
45

	
46

	
47

	
48

	
49

	
50

	
51

	
52

	
53

	
54

	
55

	
56

	
57

	
58

	
59

	
60

	
61

	
62

	
63

	
64

	
65

	
66

	
67

	
68

	
69

	
70

	
71

	
72

	
73

	
74

	
75

	
76

	
77

	
78

	
79

	
80

	
81

	
82

	
83

	
84

	
85

	
86

	
87

	
88

	
89

	
90

	
91

	
92

	
93

	
94

	
95

	
96

	
97

	
98

	
99

	
100

	
101

	
102

	
103

	
104

	
105

	
106

	
107

	
108

	
109

	
110

	
111

	
112

	
113

	
114

	
115

	
116

	
117

	
118

	
119

	
120

	
121

	
122

	
123

	
124

	
125

	
126

	
127

	
128

	
129

	
130

	
131

	
132

	
133

	
134

	
135

	
136

	
137

	
138

	
139

	
140

	
141

	
142

	
143

	
144

	
145

	
146

	
147

	
148

	
149

	
150

	
151

	
152

	
153

	
154

	
155

	
156

	
157

	
158

	
159

	
160

	
161

	
162

	
163

	
164

	
165

	
166

	
167

	
168

	
169

	
170

	
171

	
172

	
173

	
174

	
175

	
176

	
177

	
178

	
179

	
180

	
181

	
182

	
183

	
184

	
185

	
186

	
187

	
188

	
189

	
190

	
191

	
192

	
193

	
194

	
195

	
196

	
197

	
198

	
199

	
200

	
201

	
202

	
203

	
204

	
205

	
206

	
207

	
208

	
209

	
210

	
211

	
212

	
213

	
214

	
215

	
216

	
217

	
218

	
219

	
220

	
221

	
222

	
223

	
224

	
225

	
226

	
227

	
228

	
229

	
230

	
231

	
232

	
233

	
234

	
235

	
236

	
'''
This contains the code to Train a Model to Predict the Origin of Blood Clot
'''
import tensorflow as tf
from tensorflow import keras

import cv2

from tensorflow.keras.preprocessing.image import random_rotation,random_shift,random_brightness
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, GlobalMaxPooling2D, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping, Callback
from tensorflow.keras.applications import EfficientNetB5
import math
from tensorflow.keras.optimizers import Adam
from config import *
from utils import *

from dagshub.upload import Repo
import glob
print("Tensorflow Version is ")
print(tf.__version__)

tf.debugging.set_log_device_placement(True) ## This allows us to see which device is being used for building the model - wither CPU or GPU

print("List of Tensorflow Devices ")
gpus = tf.config.experimental.list_physical_devices("GPU")

os.environ["AZUREML_ARTIFACTS_DEFAULT_TIMEOUT"] = "3000"

print("AZUREML_ARTIFACTS_DEFAULT_TIMEOUT Timeout Set is ",os.environ["AZUREML_ARTIFACTS_DEFAULT_TIMEOUT"])

import mlflow
import pathlib

mlflow.autolog()

def step_decay(epoch):
    initial_lrate = 0.001
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop, math.floor((epoch)/epochs_drop))
    return lrate

def model_EfficentNetB5(efficient_net_weights, lr = 0.001, dr_rate = 0.15):
    model = EfficientNetB5(include_top=False, weights=efficient_net_weights)
    model.trainable = False

    # Rebuild top
    x = GlobalAveragePooling2D()(model.output)
    x = BatchNormalization()(x)
    x = Dropout(dr_rate)(x)
    dense_1 = Dense(64, activation="relu")(x)
    dense_2 = Dense(32, activation="relu")(dense_1)
    outputs = Dense(1, activation="sigmoid")(dense_2)

    # Compile
    
    model = Model(model.inputs, outputs, name="EfficientNet")
    optimizer = Adam(learning_rate=lr)
    model.compile(
        optimizer=optimizer, loss="binary_crossentropy", metrics=["binary_accuracy"]
    )
    return model


'''
This data generator reads the data from a given path and generates batches of data . 
'''


class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    
    def __init__(self,dataframe,data_directory,dimensions=(512,512),batch_size: int=16,shuffle=True,num_channels=3,mode="train",rotation_range=None,width_shift_range=None,height_shift_range=None,brightness_range=None,horizontal_flip=False):
        '''
       
           Initialise the data . 
       
        '''
        #self.df=data.copy()
        self.batch_size=batch_size
        self.dim=dimensions
        self.data_directory=data_directory
        self.shuffle=shuffle
        self.rotation_range=rotation_range
        self.horizontal_flip=horizontal_flip
        self.brightness_range=brightness_range
        self.height_shift_range=height_shift_range
        self.width_shift_range=width_shift_range
        self.fs=create_streaming_client() 
        
        
        if mode=="train":
            dat=dataframe[dataframe['is_train']=="train"]
            dat=dat.reset_index(drop=True)
        else:       
            dat=dataframe[dataframe['is_train']=="val"]
            dat=dat.reset_index(drop=True)
            
            
        self.images=dat['image_id'].tolist()
        self.labels=dat['int_labels'].tolist()
        unique_labels=set(self.labels)
        self.n_channels=num_channels
        
        self.n_classes = len(unique_labels)
        
        #print("Number of Channels ",self.n_channels)
        #print("Number of Labels ",self.n_classes)
        print("Number of Images for "+mode+" is "+str(len(self.images)))
        self.on_epoch_end()
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.images) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        #list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y
    def on_epoch_end(self):
        'Updates indexes after each epoch. If shuffle is true, this will shuffle the dataset'
        print("In On_EPOCH_END")
        self.indexes = np.arange(len(self.images))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        
        ### Generate data based on the indexes.
        for i,idx in enumerate(indexes):
            image_id=self.images[idx]
            img_path=os.path.join(self.data_directory,image_id+".png")

            ### Read the Images using Streaming Client
            img=read_images(self.fs,img_path)

            
            #img=cv2.imread(img_path)
            img=img/255
            img=cv2.resize(img,self.dim)
            
            if self.rotation_range!=None:
                img=random_rotation(img,self.rotation_range)
            if self.width_shift_range!=None:
                img=random_shift(img,wrg=self.width_shift_range,hrg=0)
            if self.height_shift_range!=None:
                img=random_shift(img,hrg=self.height_shift_range,wrg=0)
            if self.brightness_range!=None:
                img=random_brightness(img,brightness_range=self.brightness_range)
            if self.horizontal_flip==True:
                img=cv2.flip(img, 1)
                
            
            X[i,]=img
            ### Store the labels
            y[i]=self.labels[idx]
        print("Data Shape Printing")
        print(X.shape)
        print(y.shape)
        return X,y


### Now let us clone the Git Repo first
print("Cloning the Repo")
gitclone()
print("Done Cloning")

fs=create_streaming_client()
data=get_train_dataframe(fs,"train.csv")
data=train_split(data) ## Splitting the data into train and val

### Create Training and Validation Generator
train_data_generator=DataGenerator(dataframe=data,data_directory=TRAIN_DATA_PATH,mode="train",rotation_range=10, width_shift_range=0.2, height_shift_range=0.2,horizontal_flip=True,brightness_range=[0.2, 1.2],batch_size=32)
validation_data_generator=DataGenerator(dataframe=data,data_directory=TRAIN_DATA_PATH,mode="validation",rotation_range=10, width_shift_range=0.2, height_shift_range=0.2,horizontal_flip=True,brightness_range=[0.2, 1.2],batch_size=32)


lrate = LearningRateScheduler(step_decay)
earstop = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 3)

### Load the EfficientNet Model

efficientWeight=download_EfficientNet(fs,'efficientnet-b5_tf24_imagenet_1000_notop.h5')

efficentB5 = model_EfficentNetB5(efficientWeight)

### 

history_0 = efficentB5.fit(
    train_data_generator,
    epochs = 4,
    validation_data = validation_data_generator,
    verbose = 1,
    callbacks = [lrate, earstop]
)

#pathlib.Path("Blood_Clot_Prediction_Models").mkdir(parents=True,exist_ok=True)
print("saving model")
efficentB5.save('outputs/efficientNet_Model')


print("Going to Upload Files to Dagshub")
### Let us then upload the files from the EfficientNet_Model to Dagshub Repo

## STep 1: Connect to the Repo
repo = Repo("aiswaryasrinivas",DAGSHUB_REPO_NAME, username=DAGSHUB_USERNAME ,password=DAGSHUB_TOKEN) 

### Uploading all the files into the model folder. 

for __file__ in glob.glob("outputs/efficientNet_Model/*.pb"):
    filename=os.path.basename(__file__)
    repo.upload(file=__file__, path="model/efficientNet_Model/"+filename, commit_message = "file added"+filename,versioning="dvc")

for __file__ in glob.glob("outputs/efficientNet_Model/variables/*"):
    print(__file__)
    filename=os.path.basename(__file__)
    repo.upload(file=__file__, path="model/efficientNet_Model/variables/"+filename, commit_message = "file added "+filename,versioning="dvc")