Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

gr-book-info.sql 2.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
  1. --- #dep gr-books
  2. --- #dep gr-works
  3. --- #dep gr-index-books
  4. --- #table gr.work_title
  5. --- #table gr.book_pub_date
  6. --- #step Create useful GR functions
  7. CREATE OR REPLACE FUNCTION try_date(year VARCHAR, month VARCHAR, day VARCHAR) RETURNS DATE
  8. IMMUTABLE RETURNS NULL ON NULL INPUT PARALLEL UNSAFE
  9. LANGUAGE plpgsql AS $$
  10. BEGIN
  11. RETURN MAKE_DATE(NULLIF(year, '')::INTEGER,
  12. NULLIF(month, '')::INTEGER,
  13. NULLIF(day, '')::INTEGER);
  14. EXCEPTION WHEN SQLSTATE '22008' THEN
  15. RETURN NULL;
  16. END;
  17. $$;
  18. --- #step Extract GoodReads work titles
  19. DROP MATERIALIZED VIEW IF EXISTS gr.work_title;
  20. CREATE MATERIALIZED VIEW gr.work_title
  21. AS SELECT gr_work_rid, (gr_work_data->>'work_id')::int AS gr_work_id,
  22. NULLIF(gr_work_data->>'original_title', '') AS work_title
  23. FROM gr.raw_work;
  24. CREATE INDEX gr_work_title_work_idx ON gr.work_title (gr_work_id);
  25. ANALYZE gr.work_title;
  26. --- #step Extract GoodReads book publication dates
  27. DROP MATERIALIZED VIEW IF EXISTS gr.book_pub_date;
  28. CREATE MATERIALIZED VIEW gr.book_pub_date
  29. AS SELECT gr_book_rid, book_id AS gr_book_id,
  30. NULLIF(publication_year, '')::INTEGER AS pub_year,
  31. NULLIF(publication_month, '')::INTEGER AS pub_month,
  32. NULLIF(publication_day, '')::INTEGER AS pub_day,
  33. try_date(publication_year, publication_month, publication_day) AS pub_date
  34. FROM gr.raw_book,
  35. jsonb_to_record(gr_book_data) AS
  36. x(book_id INTEGER, publication_year VARCHAR,
  37. publication_month VARCHAR, publication_day VARCHAR)
  38. WHERE NULLIF(publication_year, '') IS NOT NULL;
  39. CREATE UNIQUE INDEX gr_bpd_rec_idx ON gr.book_pub_date (gr_book_rid);
  40. CREATE UNIQUE INDEX gr_bpd_book_idx ON gr.book_pub_date (gr_book_id);
  41. ANALYZE gr.book_pub_date;
  42. --- #step Extract GoodReads work original publication dates
  43. DROP MATERIALIZED VIEW IF EXISTS gr.work_pub_date;
  44. CREATE MATERIALIZED VIEW gr.work_pub_date
  45. AS SELECT gr_work_rid, work_id AS gr_work_id,
  46. NULLIF(original_publication_year, '')::INTEGER AS pub_year,
  47. NULLIF(original_publication_month, '')::INTEGER AS pub_month,
  48. NULLIF(original_publication_day, '')::INTEGER AS pub_day,
  49. try_date(original_publication_year, original_publication_month, original_publication_day) AS pub_date
  50. FROM gr.raw_work,
  51. jsonb_to_record(gr_work_data) AS
  52. x(work_id INTEGER, original_publication_year VARCHAR,
  53. original_publication_month VARCHAR, original_publication_day VARCHAR)
  54. WHERE NULLIF(original_publication_year, '') IS NOT NULL;
  55. CREATE UNIQUE INDEX gr_wpd_rec_idx ON gr.work_pub_date (gr_work_rid);
  56. CREATE UNIQUE INDEX gr_wpd_work_idx ON gr.work_pub_date (gr_work_id);
  57. ANALYZE gr.work_pub_date;
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...