Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

gr-index-books.sql 4.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
  1. --- #dep gr-books
  2. --- #dep gr-works
  3. --- #dep gr-authors
  4. --- #dep gr-book-genres
  5. --- #table gr.work_ids
  6. --- #table gr.book_ids
  7. --- #table gr.book_genres
  8. --- #step Add book PK
  9. --- #allow invalid_table_definition
  10. ALTER TABLE gr.raw_book ADD CONSTRAINT gr_raw_book_pk PRIMARY KEY (gr_book_rid);
  11. --- #step Add work PK
  12. --- #allow invalid_table_definition
  13. ALTER TABLE gr.raw_work ADD CONSTRAINT gr_raw_work_pk PRIMARY KEY (gr_work_rid);
  14. --- #step Add author PK
  15. --- #allow invalid_table_definition
  16. ALTER TABLE gr.raw_author ADD CONSTRAINT gr_raw_author_pk PRIMARY KEY (gr_author_rid);
  17. --- #step Add book-genre PK
  18. --- #allow invalid_table_definition
  19. ALTER TABLE gr.raw_book_genres ADD CONSTRAINT gr_raw_book_genres_pk PRIMARY KEY (gr_book_genres_rid);
  20. --- #step Extract work identifiers
  21. CREATE TABLE IF NOT EXISTS gr.work_ids
  22. AS SELECT gr_work_rid, (gr_work_data->>'work_id')::int AS gr_work_id
  23. FROM gr.raw_work;
  24. --- #step Index work identifiers
  25. --- #allow duplicate_object
  26. --- #allow invalid_table_definition
  27. CREATE UNIQUE INDEX IF NOT EXISTS work_id_idx ON gr.work_ids (gr_work_id);
  28. ALTER TABLE gr.work_ids ADD CONSTRAINT gr_work_id_pk PRIMARY KEY (gr_work_rid);
  29. ALTER TABLE gr.work_ids ADD CONSTRAINT gr_work_id_fk FOREIGN KEY (gr_work_rid) REFERENCES gr.raw_work (gr_work_rid);
  30. ANALYZE gr.work_ids;
  31. --- #step Extract book identifiers
  32. CREATE TABLE IF NOT EXISTS gr.book_ids
  33. AS SELECT gr_book_rid,
  34. NULLIF(work_id, '')::int AS gr_work_id,
  35. book_id AS gr_book_id,
  36. NULLIF(trim(both from asin), '') AS gr_asin,
  37. NULLIF(trim(both from isbn), '') AS gr_isbn,
  38. NULLIF(trim(both from isbn13), '') AS gr_isbn13
  39. FROM gr.raw_book,
  40. jsonb_to_record(gr_book_data) AS x(work_id VARCHAR, book_id INTEGER, asin VARCHAR, isbn VARCHAR, isbn13 VARCHAR);
  41. --- #step Index book identifiers
  42. --- #allow invalid_table_definition
  43. -- If we have completed this step, the PK add will fail with invalid table definition
  44. ALTER TABLE gr.book_ids ADD CONSTRAINT gr_book_id_pk PRIMARY KEY (gr_book_rid);
  45. CREATE UNIQUE INDEX book_id_idx ON gr.book_ids (gr_book_id);
  46. CREATE INDEX book_work_idx ON gr.book_ids (gr_work_id);
  47. CREATE INDEX book_asin_idx ON gr.book_ids (gr_asin);
  48. CREATE INDEX book_isbn_idx ON gr.book_ids (gr_isbn);
  49. CREATE INDEX book_isbn13_idx ON gr.book_ids (gr_isbn13);
  50. ALTER TABLE gr.book_ids ADD CONSTRAINT gr_book_id_fk FOREIGN KEY (gr_book_rid) REFERENCES gr.raw_book (gr_book_rid);
  51. ALTER TABLE gr.book_ids ADD CONSTRAINT gr_book_id_work_fk FOREIGN KEY (gr_work_id) REFERENCES gr.work_ids (gr_work_id);
  52. ANALYZE gr.book_ids;
  53. --- #step Update ISBN ID records with new ISBNs from GoodReads
  54. INSERT INTO isbn_id (isbn)
  55. SELECT DISTINCT gr_isbn FROM gr.book_ids
  56. WHERE gr_isbn IS NOT NULL AND gr_isbn NOT IN (SELECT isbn FROM isbn_id);
  57. INSERT INTO isbn_id (isbn)
  58. SELECT DISTINCT gr_isbn13 FROM gr.book_ids
  59. WHERE gr_isbn13 IS NOT NULL AND gr_isbn13 NOT IN (SELECT isbn FROM isbn_id);
  60. --- #step Update ISBN ID records with ASINs from GoodReads
  61. INSERT INTO isbn_id (isbn)
  62. SELECT DISTINCT gr_asin FROM gr.book_ids
  63. WHERE gr_asin IS NOT NULL AND gr_asin NOT IN (SELECT isbn FROM isbn_id);
  64. --- #step Map ISBNs to book IDs
  65. CREATE TABLE IF NOT EXISTS gr.book_isbn
  66. AS SELECT gr_book_id, isbn_id, COALESCE(bc_of_gr_work(gr_work_id), bc_of_gr_book(gr_book_id)) AS book_code
  67. FROM gr.book_ids, isbn_id
  68. WHERE isbn = gr_isbn OR isbn = gr_isbn13;
  69. --- #step Index GoodReads ISBNs
  70. --- #allow duplicate_object
  71. --- #allow duplicate_table
  72. -- this will fail promptly with duplicate object if the index already exists
  73. CREATE INDEX book_isbn_book_idx ON gr.book_isbn (gr_book_id);
  74. CREATE INDEX book_isbn_isbn_idx ON gr.book_isbn (isbn_id);
  75. CREATE INDEX book_isbn_code_idx ON gr.book_isbn (book_code);
  76. ALTER TABLE gr.book_isbn ADD CONSTRAINT gr_book_isbn_book_fk FOREIGN KEY (gr_book_id) REFERENCES gr.book_ids (gr_book_id);
  77. ALTER TABLE gr.book_isbn ADD CONSTRAINT gr_book_isbn_isbn_fk FOREIGN KEY (isbn_id) REFERENCES isbn_id (isbn_id);
  78. ANALYZE gr.book_isbn;
  79. --- #step Index GoodReads book genres
  80. --- #allow duplicate_table
  81. CREATE TABLE IF NOT EXISTS gr.book_genres
  82. AS SELECT gr_book_rid, gr_book_id, key AS genre, value AS score
  83. FROM gr.raw_book_genres, jsonb_each_text(gr_book_genres_data->'genres'),
  84. gr.book_ids
  85. WHERE gr_book_id = (gr_book_genres_data->>'book_id')::int;
  86. CREATE INDEX bg_book_rid ON gr.book_genres (gr_book_rid);
  87. CREATE INDEX bg_book_id ON gr.book_genres (gr_book_id);
  88. --- #step Extract GoodReads book titles
  89. DROP MATERIALIZED VIEW IF EXISTS gr.work_titles;
  90. CREATE MATERIALIZED VIEW gr.work_titles
  91. AS SELECT gr_work_rid, (gr_work_data->>'work_id')::int AS gr_work_id,
  92. NULLIF(gr_work_data->>'original_title', '') AS work_title
  93. FROM gr.raw_work;
  94. CREATE INDEX gr_work_title_work_idx ON gr.work_titles (gr_work_id);
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...