Browse Source

Next deep learning methods refactor

Witold 8 months ago
parent
commit
3264b5452d

+ 12
- 11
collaborative_filtering/neural_collaborative_filtering.py

@@ -114,7 +114,8 @@ class NeuralCollaborativeFiltering(RecommendationMethod):
         gmf_dot_product = tf.keras.layers.Dot(axes=1)([mf_user_embedding, mf_item_embedding])
         gmf_dot_product = tf.keras.layers.Flatten()(gmf_dot_product)
 
-        final_dense = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, trainable=True, name="gmf_dense_output")(
+        final_dense = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid, trainable=True,
+                                            name="gmf_dense_output")(
             gmf_dot_product)
 
         model = tf.keras.models.Model([user_inputs, item_inputs], final_dense)
@@ -279,7 +280,7 @@ class NeuralCollaborativeFiltering(RecommendationMethod):
 
         train_ds = self._generate_dataset(train_user_item_ratings, batch_size)
 
-        loss_object = tf.keras.losses.MeanSquaredError()
+        loss_object = tf.keras.losses.BinaryCrossentropy()
         optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)
 
         done_epochs = 0
@@ -301,7 +302,7 @@ class NeuralCollaborativeFiltering(RecommendationMethod):
 
         self.train_loss.reset_states()
 
-        loss_object = tf.keras.losses.MeanSquaredError()
+        loss_object = tf.keras.losses.BinaryCrossentropy()
         optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)
 
         done_epochs = 0
@@ -328,8 +329,8 @@ class NeuralCollaborativeFiltering(RecommendationMethod):
     def fit(self, train_user_items_ratings, test_user_items_ratings=None, batch_size=20, epochs=20, early_stopping=-1):
         self.user_ratings = create_user_items_rating_matrix_w_indexer(train_user_items_ratings, self.indexer)
 
-        train_user_items_ratings.extend(self._generate_negative_samples(train_user_items_ratings, 3))
-        gmf_model, mlp_model = self._pretrain_models(train_user_items_ratings, epochs=30, error_delta=0.005)
+        #train_user_items_ratings.extend(self._generate_negative_samples(train_user_items_ratings, 1))
+        gmf_model, mlp_model = self._pretrain_models(train_user_items_ratings, epochs=30, error_delta=0.0001)
 
         self.model = self._build_neu_mf_model(pretrained_gmf_model=gmf_model, pretrained_mlp_model=mlp_model)
 
@@ -339,7 +340,7 @@ class NeuralCollaborativeFiltering(RecommendationMethod):
         if eval_test:
             test_ds = self._generate_dataset(test_user_items_ratings, batch_size)
 
-        loss_object = tf.keras.losses.MeanSquaredError()
+        loss_object = tf.keras.losses.BinaryCrossentropy()
         optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)
 
         prev_test_loss = float("inf")
@@ -384,12 +385,12 @@ class NeuralCollaborativeFiltering(RecommendationMethod):
         item_input = np.array([self.indexer.get_user_internal_id(item_id)])
         return self.model.predict([user_input, item_input]).squeeze().tolist()
 
-    def get_recommendations(self, user_id, k):
-        user_input = np.full((self.num_items, 1), fill_value=self.indexer.get_user_internal_id[user_id])
+    def get_recommendations(self, user_id, k=None):
+        user_input = np.full((self.num_items, 1), fill_value=self.indexer.get_user_internal_id(user_id))
         item_input = np.expand_dims(np.arange(0, self.num_items), axis=1)
 
-        non_rated_user_movies = self.user_ratings[self.indexer.get_user_internal_id(user_id), :] == 0
         recommendations = self.model.predict([user_input, item_input]).squeeze()
         recommendations_idx = np.argsort(recommendations)[::-1]
-        return [self.indexer.get_movie_id(i) for i in recommendations_idx if
-                non_rated_user_movies[i]][:k]
+        recommendations_idx = [self.indexer.get_movie_id(internal_id) for internal_id in recommendations_idx]
+        return recommendations_idx[:k] if k is not None else recommendations_idx
+

File diff suppressed because it is too large
+ 453
- 453
data/generated/movies_data_df.csv

+ 5
- 3
deep_learning/movie_features_deep_learning_method.py

@@ -73,12 +73,14 @@ class MovieFeaturesDeepLearningMethod(RecommendationMethod):
         self.model = self._build_model()
         self.model.load_weights(filepath=filepath)
 
-    def get_recommendations(self, user, movies, k=None):
-        user_input = np.repeat(np.expand_dims(user, axis=0), movies.shape[0], axis=0)
-        movies_input = movies
+    def get_recommendations(self, user_id, k=None):
+        user = self.user_features[self.indexer.get_user_internal_id(user_id), :]
+        user_input = np.repeat(np.expand_dims(user, axis=0), self.movies_features.shape[0], axis=0)
+        movies_input = self.movies_features
 
         recommendations = self.model.predict([user_input, movies_input]).squeeze()
         recommendations_idx = np.argsort(recommendations)[::-1]
+        recommendations_idx = [self.indexer.get_movie_id(internal_id) for internal_id in recommendations_idx]
         return recommendations_idx[:k] if k is not None else recommendations_idx
 
     def _generate_negative_samples(self, data, count_for_one_user):

+ 11
- 10
deep_learning/scripts/run_deep_learning_method.py

@@ -30,7 +30,7 @@ def map_df_to_model_input(data_df):
     data = data_df[[user_column, item_column, rating_column]].values
     return [(int(r[0]),
              int(r[1]),
-             r[2]) for r in data]
+             r[2] / 5) for r in data]
 
 
 def main():
@@ -43,7 +43,8 @@ def main():
     user_ratings_df = user_ratings_df[user_ratings_df[item_column].isin(movie_id_features_dict.keys())]
 
     train_ratings_df, test_ratings_df = \
-        list(user_leave_on_out(user_ratings_df, timestamp_column='timestamp', make_user_folds=False))[0]
+        list(user_leave_on_out(user_ratings_df, timestamp_column='timestamp', make_user_folds=False,
+                               rating_threshold=3.5))[0]
 
     movies_features, user_features, indexer = get_movies_model_features(user_ratings_df, train_ratings_df, dataset_path,
                                                                         path_to_saved_features)
@@ -51,25 +52,25 @@ def main():
     train_data = map_df_to_model_input(train_ratings_df)
     test_data = map_df_to_model_input(test_ratings_df)
 
-    #method = NeuralCollaborativeFiltering(indexer=indexer, n_factors=128, model_name="neucf_128_wo_threshold")
+    # method = NeuralCollaborativeFiltering(indexer=indexer, n_factors=64, model_name="neucf_64_w_4_0_threshold")
 
     method = MovieFeaturesDeepLearningMethod(indexer=indexer, movies_features=movies_features,
-                                             user_features=user_features, model_name="movies_features")
+                                             user_features=user_features, model_name="movies_features_w_4_0_threshold")
 
-    #method.fit(train_data, test_data, batch_size=50, epochs=50, early_stopping=-1)
+    # method.fit(train_data, test_data, batch_size=25, epochs=50, early_stopping=3)
 
-    method.load_model(filepath=os.path.join(PATH_TO_PROJECT, "deep_learning", "saved_models", "movies_features.h5"))
+    # method.load_model(filepath=os.path.join(PATH_TO_PROJECT, "collaborative_filtering", "saved_models", "neucf_64_w_4_0_threshold.h5"), train_user_item_ratings=train_data)
+    method.load_model(filepath=os.path.join(PATH_TO_PROJECT, "deep_learning", "saved_models", "movies_features_w_4_0_threshold.h5"))
 
     for user, movie, rating in test_data[:10]:
-        recommendations = method.get_recommendations(user_features[indexer.get_user_internal_id(user), :],
-                                                     movies_features)
+        recommendations = method.get_recommendations(user_id=user)
 
         user_rated_movies = train_ratings_df[train_ratings_df[user_column] == user] \
             .sort_values(rating_column, ascending=False)[[item_column]] \
             .values.squeeze()
 
-        recommended_movies = [indexer.get_movie_id(movie_internal_id) for movie_internal_id in recommendations if
-                              movie_internal_id not in user_rated_movies][:30]
+        recommended_movies = [movie_internal_id for movie_internal_id in recommendations if
+                              movie_internal_id not in user_rated_movies][:10]
 
         print("Rated movies: ")
         for movie_id in user_rated_movies: