Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

gr-book-info.sql 3.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  1. --- #dep gr-books
  2. --- #dep gr-works
  3. --- #dep gr-index-books
  4. --- #dep cluster
  5. --- #table gr.work_title
  6. --- #table gr.book_pub_date
  7. --- #step Create useful GR functions
  8. CREATE OR REPLACE FUNCTION try_date(year VARCHAR, month VARCHAR, day VARCHAR) RETURNS DATE
  9. IMMUTABLE RETURNS NULL ON NULL INPUT PARALLEL UNSAFE
  10. LANGUAGE plpgsql AS $$
  11. BEGIN
  12. RETURN MAKE_DATE(NULLIF(year, '')::INTEGER,
  13. NULLIF(month, '')::INTEGER,
  14. NULLIF(day, '')::INTEGER);
  15. EXCEPTION WHEN SQLSTATE '22008' THEN
  16. RETURN NULL;
  17. END;
  18. $$;
  19. --- #step Index book clusters
  20. CREATE MATERIALIZED VIEW IF NOT EXISTS gr.book_cluster
  21. AS SELECT DISTINCT gr_book_id, cluster
  22. FROM gr.book_isbn JOIN isbn_cluster USING (isbn_id);
  23. CREATE UNIQUE INDEX IF NOT EXISTS book_cluster_book_idx ON gr.book_cluster (gr_book_id);
  24. CREATE INDEX IF NOT EXISTS book_cluster_cluster_idx ON gr.book_cluster (cluster);
  25. ANALYZE gr.book_cluster;
  26. --- #step Extract GoodReads work titles
  27. DROP MATERIALIZED VIEW IF EXISTS gr.work_title;
  28. CREATE MATERIALIZED VIEW gr.work_title
  29. AS SELECT gr_work_rid, (gr_work_data->>'work_id')::int AS gr_work_id,
  30. NULLIF(gr_work_data->>'original_title', '') AS work_title
  31. FROM gr.raw_work;
  32. CREATE INDEX gr_work_title_work_idx ON gr.work_title (gr_work_id);
  33. ANALYZE gr.work_title;
  34. --- #step Extract GoodReads book publication dates
  35. DROP MATERIALIZED VIEW IF EXISTS gr.book_pub_date;
  36. CREATE MATERIALIZED VIEW gr.book_pub_date
  37. AS SELECT gr_book_rid, book_id AS gr_book_id,
  38. NULLIF(publication_year, '')::INTEGER AS pub_year,
  39. NULLIF(publication_month, '')::INTEGER AS pub_month,
  40. NULLIF(publication_day, '')::INTEGER AS pub_day,
  41. try_date(publication_year, publication_month, publication_day) AS pub_date
  42. FROM gr.raw_book,
  43. jsonb_to_record(gr_book_data) AS
  44. x(book_id INTEGER, publication_year VARCHAR,
  45. publication_month VARCHAR, publication_day VARCHAR)
  46. WHERE NULLIF(publication_year, '') IS NOT NULL;
  47. CREATE UNIQUE INDEX gr_bpd_rec_idx ON gr.book_pub_date (gr_book_rid);
  48. CREATE UNIQUE INDEX gr_bpd_book_idx ON gr.book_pub_date (gr_book_id);
  49. ANALYZE gr.book_pub_date;
  50. --- #step Extract GoodReads work original publication dates
  51. DROP MATERIALIZED VIEW IF EXISTS gr.work_pub_date;
  52. CREATE MATERIALIZED VIEW gr.work_pub_date
  53. AS SELECT gr_work_rid, work_id AS gr_work_id,
  54. NULLIF(original_publication_year, '')::INTEGER AS pub_year,
  55. NULLIF(original_publication_month, '')::INTEGER AS pub_month,
  56. NULLIF(original_publication_day, '')::INTEGER AS pub_day,
  57. try_date(original_publication_year, original_publication_month, original_publication_day) AS pub_date
  58. FROM gr.raw_work,
  59. jsonb_to_record(gr_work_data) AS
  60. x(work_id INTEGER, original_publication_year VARCHAR,
  61. original_publication_month VARCHAR, original_publication_day VARCHAR)
  62. WHERE NULLIF(original_publication_year, '') IS NOT NULL;
  63. CREATE UNIQUE INDEX gr_wpd_rec_idx ON gr.work_pub_date (gr_work_rid);
  64. CREATE UNIQUE INDEX gr_wpd_work_idx ON gr.work_pub_date (gr_work_id);
  65. ANALYZE gr.work_pub_date;
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...