Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

ol-index.sql 9.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
  1. --- #dep ol-authors
  2. --- #dep ol-editions
  3. --- #dep ol-works
  4. --- #step Index OL author table
  5. --- #allow invalid_table_definition
  6. CREATE INDEX IF NOT EXISTS author_key_idx ON ol.author (author_key);
  7. ALTER TABLE ol.author ADD PRIMARY KEY (author_id);
  8. --- #step Index OL work table
  9. --- #allow invalid_table_definition
  10. CREATE INDEX IF NOT EXISTS work_key_idx ON ol.work (work_key);
  11. ALTER TABLE ol.work ADD PRIMARY KEY (work_id);
  12. --- #step Index OL edition table
  13. --- #allow invalid_table_definition
  14. CREATE INDEX IF NOT EXISTS edition_key_idx ON ol.edition (edition_key);
  15. ALTER TABLE ol.edition ADD PRIMARY KEY (edition_id);
  16. --- #step Set up work-author join table
  17. CREATE TABLE IF NOT EXISTS ol.work_authors
  18. AS SELECT work_id, author_id
  19. FROM ol.author
  20. JOIN (SELECT work_id, jsonb_array_elements((work_data->'authors')) #>> '{author,key}' AS author_key FROM ol.work) w
  21. USING (author_key);
  22. --- #step Index work-author join table
  23. CREATE INDEX work_author_wk_idx ON ol.work_authors (work_id);
  24. CREATE INDEX work_author_au_idx ON ol.work_authors (author_id);
  25. --- #step Author work FK
  26. --- #allow duplicate_object
  27. ALTER TABLE ol.work_authors ADD CONSTRAINT work_author_wk_fk FOREIGN KEY (work_id) REFERENCES ol.work;
  28. --- #step Author-work author FK
  29. --- #allow duplicate_object
  30. ALTER TABLE ol.work_authors ADD CONSTRAINT work_author_au_fk FOREIGN KEY (author_id) REFERENCES ol.author;
  31. --- #step Set up work first author table
  32. CREATE TABLE IF NOT EXISTS ol.work_first_author
  33. AS SELECT work_id, author_id
  34. FROM ol.author
  35. JOIN (SELECT work_id, work_data #>> '{authors,0,author,key}' AS author_key FROM ol.work) w
  36. USING (author_key);
  37. --- #step Index work first author table
  38. CREATE INDEX IF NOT EXISTS work_first_author_wk_idx ON ol.work_first_author (work_id);
  39. CREATE INDEX IF NOT EXISTS work_first_author_au_idx ON ol.work_first_author (author_id);
  40. --- #step First author work FK
  41. --- #allow duplicate_object
  42. ALTER TABLE ol.work_first_author ADD CONSTRAINT work_first_author_wk_fk FOREIGN KEY (work_id) REFERENCES ol.work;
  43. --- #step First author author FK
  44. --- #allow duplicate_object
  45. ALTER TABLE ol.work_first_author ADD CONSTRAINT work_first_author_au_fk FOREIGN KEY (author_id) REFERENCES ol.author;
  46. --- #step Set up edition-author join table
  47. CREATE TABLE IF NOT EXISTS ol.edition_author
  48. AS SELECT edition_id, author_id
  49. FROM ol.author
  50. JOIN (SELECT edition_id, jsonb_array_elements((edition_data->'authors')) ->> 'key' AS author_key
  51. FROM ol.edition) e
  52. USING (author_key);
  53. --- #step index edition-author join table
  54. CREATE INDEX IF NOT EXISTS edition_author_ed_idx ON ol.edition_author (edition_id);
  55. CREATE INDEX IF NOT EXISTS edition_author_au_idx ON ol.edition_author (author_id);
  56. --- #allow duplicate_object
  57. ALTER TABLE ol.edition_author ADD CONSTRAINT edition_author_wk_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  58. --- #allow duplicate_object
  59. ALTER TABLE ol.edition_author ADD CONSTRAINT edition_author_au_fk FOREIGN KEY (author_id) REFERENCES ol.author;
  60. --- #step Create edition first-author table
  61. CREATE TABLE ol.edition_first_author
  62. AS SELECT edition_id, author_id
  63. FROM ol.author
  64. JOIN (SELECT edition_id, edition_data #>> '{authors,0,key}' AS author_key
  65. FROM ol.edition) e
  66. USING (author_key);
  67. --- #step Index edition first-author table
  68. CREATE INDEX IF NOT EXISTS edition_first_author_ed_idx ON ol.edition_first_author (edition_id);
  69. CREATE INDEX IF NOT EXISTS edition_first_author_au_idx ON ol.edition_first_author (author_id);
  70. --- #step Edition first author edition FK
  71. --- #allow duplicate_object
  72. ALTER TABLE ol.edition_first_author ADD CONSTRAINT edition_first_author_wk_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  73. --- #step Edition first author author FK
  74. --- #allow duplicate_object
  75. ALTER TABLE ol.edition_first_author ADD CONSTRAINT edition_first_author_au_fk FOREIGN KEY (author_id) REFERENCES ol.author;
  76. --- #step Set up edition-work join table
  77. CREATE TABLE IF NOT EXISTS ol.edition_work
  78. AS SELECT edition_id, work_id
  79. FROM ol.work
  80. JOIN (SELECT edition_id, jsonb_array_elements((edition_data->'works')) ->> 'key' AS work_key FROM ol.edition) w
  81. USING (work_key);
  82. --- #step Index edition-work join table
  83. CREATE INDEX IF NOT EXISTS edition_work_ed_idx ON ol.edition_work (edition_id);
  84. CREATE INDEX IF NOT EXISTS edition_work_au_idx ON ol.edition_work (work_id);
  85. --- #step Edition-work edition FK
  86. --- #allow duplicate_object
  87. ALTER TABLE ol.edition_work ADD CONSTRAINT edition_work_ed_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  88. --- #step Edition-work work FK
  89. --- #allow duplicate_object
  90. ALTER TABLE ol.edition_work ADD CONSTRAINT edition_work_wk_fk FOREIGN KEY (work_id) REFERENCES ol.work;
  91. --- #step Extract ISBNs and ASINs
  92. CREATE MATERIALIZED VIEW IF NOT EXISTS ol.edition_isbn10
  93. AS SELECT edition_id, jsonb_array_elements_text(edition_data->'isbn_10') AS isbn
  94. FROM ol.edition;
  95. CREATE MATERIALIZED VIEW IF NOT EXISTS ol.edition_isbn13
  96. AS SELECT edition_id, jsonb_array_elements_text(edition_data->'isbn_13') AS isbn
  97. FROM ol.edition;
  98. CREATE MATERIALIZED VIEW IF NOT EXISTS ol.edition_asin
  99. AS SELECT edition_id, jsonb_array_elements_text(edition_data#>'{identifiers,amazon}') AS asin
  100. FROM ol.edition;
  101. CREATE MATERIALIZED VIEW IF NOT EXISTS ol.edition_lccn
  102. AS SELECT edition_id, jsonb_array_elements_text(edition_data->'lccn') AS lccn
  103. FROM ol.edition;
  104. CREATE MATERIALIZED VIEW IF NOT EXISTS ol.edition_gr_bid
  105. AS SELECT edition_id, jsonb_array_elements_text(edition_data#>'{identifiers,goodreads}') AS gr_book_rid
  106. FROM ol.edition;
  107. --- #step Integrate ISBN/ASIN identifiers
  108. DROP TABLE IF EXISTS ol.edition_isbn CASCADE;
  109. CREATE TABLE ol.edition_isbn (
  110. edition_id INTEGER NOT NULL,
  111. isbn VARCHAR NOT NULL
  112. );
  113. WITH
  114. ol_edition_isbn10
  115. AS (SELECT edition_id, jsonb_array_elements_text(edition_data->'isbn_10') AS isbn
  116. FROM ol.edition)
  117. INSERT INTO ol.edition_isbn
  118. SELECT edition_id, isbn
  119. FROM (SELECT edition_id, extract_isbn(isbn) AS isbn
  120. FROM ol_edition_isbn10) isbns
  121. WHERE isbn IS NOT NULL AND char_length(isbn) IN (10,13);
  122. WITH
  123. ol_edition_isbn13
  124. AS (SELECT edition_id, jsonb_array_elements_text(edition_data->'isbn_13') AS isbn
  125. FROM ol.edition)
  126. INSERT INTO ol.edition_isbn
  127. SELECT edition_id, isbn
  128. FROM (SELECT edition_id, extract_isbn(isbn) AS isbn
  129. FROM ol_edition_isbn13) isbns
  130. WHERE isbn IS NOT NULL AND char_length(isbn) IN (10,13);
  131. WITH
  132. ol_edition_asin AS
  133. (SELECT edition_id, jsonb_array_elements_text(edition_data#>'{identifiers,amazon}') AS asin
  134. FROM ol.edition)
  135. INSERT INTO ol.edition_isbn
  136. SELECT edition_id, regexp_replace(upper(asin), '[- ]', '')
  137. FROM ol_edition_asin
  138. WHERE regexp_replace(upper(asin), '[- ]', '') ~ '^[A-Z0-9]{10}$';
  139. CREATE INDEX edition_isbn_ed_idx ON ol.edition_isbn (edition_id);
  140. CREATE INDEX edition_isbn_idx ON ol.edition_isbn (isbn);
  141. ALTER TABLE ol.edition_isbn ADD CONSTRAINT edition_work_ed_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  142. ANALYZE ol.edition_isbn;
  143. --- #step Add OL ISBNs to global table
  144. INSERT INTO isbn_id (isbn)
  145. SELECT DISTINCT isbn FROM ol.edition_isbn
  146. WHERE isbn NOT IN (SELECT DISTINCT isbn FROM isbn_id);
  147. ANALYZE isbn_id;
  148. --- #step Link OL ISBNs
  149. DROP TABLE IF EXISTS ol.isbn_link CASCADE;
  150. CREATE TABLE ol.isbn_link (
  151. isbn_id INTEGER NOT NULL,
  152. edition_id INTEGER NOT NULL,
  153. work_id INTEGER NULL,
  154. book_code INTEGER NOT NULL
  155. );
  156. INSERT INTO ol.isbn_link
  157. SELECT isbn_id, edition_id, work_id,
  158. COALESCE(bc_of_work(work_id), bc_of_edition(edition_id))
  159. FROM isbn_id JOIN ol.edition_isbn USING (isbn) LEFT JOIN ol.edition_work USING (edition_id);
  160. CREATE INDEX isbn_link_ed_idx ON ol.isbn_link (edition_id);
  161. CREATE INDEX isbn_link_wk_idx ON ol.isbn_link (work_id);
  162. CREATE INDEX isbn_link_bc_idx ON ol.isbn_link (book_code);
  163. CREATE INDEX isbn_link_isbn_idx ON ol.isbn_link (isbn_id);
  164. ALTER TABLE ol.isbn_link ADD CONSTRAINT ol_isbn_link_work_fk FOREIGN KEY (work_id) REFERENCES ol.work;
  165. ALTER TABLE ol.isbn_link ADD CONSTRAINT ol_isbn_link_ed_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  166. ANALYZE ol.isbn_link;
  167. --- #step Tabulate author names
  168. DROP TABLE IF EXISTS ol.author_name CASCADE;
  169. CREATE TABLE ol.author_name (
  170. author_id INTEGER NOT NULL,
  171. author_name VARCHAR NOT NULL,
  172. name_source VARCHAR NOT NULL
  173. );
  174. INSERT INTO ol.author_name
  175. SELECT author_id, author_data ->> 'name', 'name'
  176. FROM ol.author WHERE author_data->>'name' IS NOT NULL;
  177. INSERT INTO ol.author_name
  178. SELECT author_id, author_data ->> 'personal_name', 'personal'
  179. FROM ol.author WHERE author_data ? 'personal_name';
  180. INSERT INTO ol.author_name
  181. SELECT author_id, jsonb_array_elements_text(author_data -> 'alternate_names'),
  182. 'alternate'
  183. FROM ol.author WHERE author_data ? 'alternate_names';
  184. CREATE INDEX author_name_idx ON ol.author_name (author_id);
  185. CREATE INDEX author_name_name_idx ON ol.author_name (author_name);
  186. ANALYZE ol.author_name;
  187. --- #step Catalog work subjects
  188. CREATE MATERIALIZED VIEW IF NOT EXISTS ol.work_subject
  189. AS SELECT work_id, jsonb_array_elements_text(work_data->'subjects') AS subject
  190. FROM ol.work;
  191. CREATE INDEX IF NOT EXISTS work_subject_work_idx ON ol.work_subject (work_id);
  192. CREATE INDEX IF NOT EXISTS work_subject_subj_idx ON ol.work_subject (subject);
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...