Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

gr-index-ratings.sql 3.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
  1. --- #dep gr-interactions
  2. --- #dep cluster
  3. --- #table gr.user_info
  4. --- #table gr.interaction
  5. --- #table gr.rating
  6. --- #step Add interaction PK
  7. --- #allow invalid_table_definition
  8. ALTER TABLE gr.raw_interaction ADD CONSTRAINT gr_raw_interaction_pk PRIMARY KEY (gr_interaction_rid);
  9. --- #step Index book clusters
  10. CREATE MATERIALIZED VIEW IF NOT EXISTS gr.book_cluster
  11. AS SELECT DISTINCT gr_book_id, cluster
  12. FROM gr.book_isbn JOIN isbn_cluster USING (isbn_id);
  13. CREATE UNIQUE INDEX IF NOT EXISTS book_cluster_book_idx ON gr.book_cluster (gr_book_id);
  14. CREATE INDEX IF NOT EXISTS book_cluster_cluster_idx ON gr.book_cluster (cluster);
  15. ANALYZE gr.book_cluster;
  16. --- #step Extract user info
  17. CREATE TABLE IF NOT EXISTS gr.user_info (
  18. gr_user_rid SERIAL PRIMARY KEY,
  19. gr_user_id VARCHAR NOT NULL
  20. );
  21. CREATE TEMPORARY TABLE gr_new_users
  22. AS SELECT gr_interaction_data->>'user_id' AS gr_user_id
  23. FROM gr.raw_interaction LEFT JOIN gr.user_info ON (gr_user_id = gr_interaction_data->>'user_id')
  24. WHERE gr_user_rid IS NULL;
  25. INSERT INTO gr.user_info (gr_user_id)
  26. SELECT DISTINCT gr_user_id FROM gr_new_users;
  27. CREATE UNIQUE INDEX IF NOT EXISTS user_id_idx ON gr.user_info (gr_user_id);
  28. ANALYZE gr.user_info;
  29. --- #step Extract interaction data
  30. CREATE TABLE IF NOT EXISTS gr.interaction
  31. AS SELECT gr_interaction_rid, book_id AS gr_book_id, gr_user_rid, rating, (gr_interaction_data->'isRead')::boolean AS is_read, date_added, date_updated
  32. FROM gr.raw_interaction,
  33. jsonb_to_record(gr_interaction_data) AS
  34. x(book_id INTEGER, user_id VARCHAR, rating INTEGER,
  35. date_added TIMESTAMP WITH TIME ZONE, date_updated TIMESTAMP WITH TIME ZONE),
  36. gr.user_info
  37. WHERE user_id = gr_user_id;
  38. DROP VIEW IF EXISTS gr.work_interaction;
  39. CREATE VIEW gr.work_interaction
  40. AS SELECT ix.*, bid.gr_work_id
  41. FROM gr.interaction ix
  42. LEFT JOIN gr.book_ids bid USING (gr_book_id);
  43. --- #step Index interaction data
  44. --- #allow invalid_table_definition
  45. -- if this step is already done, first ALTER TABLE will fail
  46. ALTER TABLE gr.interaction ADD CONSTRAINT gr_interaction_pk PRIMARY KEY (gr_interaction_rid);
  47. CREATE INDEX IF NOT EXISTS interaction_book_idx ON gr.interaction (gr_book_id);
  48. CREATE INDEX IF NOT EXISTS interaction_user_idx ON gr.interaction (gr_user_rid);
  49. ALTER TABLE gr.interaction ADD CONSTRAINT gr_interaction_book_fk FOREIGN KEY (gr_book_id) REFERENCES gr.book_ids (gr_book_id);
  50. ALTER TABLE gr.interaction ADD CONSTRAINT gr_interaction_user_fk FOREIGN KEY (gr_user_rid) REFERENCES gr.user_info (gr_user_rid);
  51. ANALYZE gr.interaction;
  52. --- #step Extract ratings
  53. CREATE MATERIALIZED VIEW IF NOT EXISTS gr.rating
  54. AS SELECT gr_user_rid AS user_id, cluster AS book_id,
  55. MEDIAN(rating) AS rating,
  56. (array_agg(rating ORDER BY date_updated DESC))[1] AS last_rating,
  57. MEDIAN(EXTRACT(EPOCH FROM date_updated)) AS timestamp,
  58. COUNT(rating) AS nratings
  59. FROM gr.interaction
  60. JOIN gr.book_cluster USING (gr_book_id)
  61. WHERE rating > 0
  62. GROUP BY gr_user_rid, cluster;
  63. CREATE INDEX IF NOT EXISTS rating_user_idx ON gr.rating (user_id);
  64. CREATE INDEX IF NOT EXISTS rating_item_idx ON gr.rating (book_id);
  65. ANALYZE gr.rating;
  66. --- #step Extract add actions
  67. CREATE MATERIALIZED VIEW IF NOT EXISTS gr.add_action
  68. AS SELECT gr_user_rid AS user_id, cluster AS book_id,
  69. COUNT(rating) AS nactions,
  70. MIN(EXTRACT(EPOCH FROM date_updated)) AS first_time,
  71. MAX(EXTRACT(EPOCH FROM date_updated)) AS last_time
  72. FROM gr.interaction
  73. JOIN gr.book_cluster USING (gr_book_id)
  74. GROUP BY gr_user_rid, cluster;
  75. CREATE INDEX IF NOT EXISTS add_action_user_idx ON gr.add_action (user_id);
  76. CREATE INDEX IF NOT EXISTS add_action_item_idx ON gr.add_action (book_id);
  77. ANALYZE gr.add_action;
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...