Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

ol-index.sql 8.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  1. -- Create indexes and constraints
  2. ALTER TABLE ol.author ADD PRIMARY KEY (author_id);
  3. CREATE INDEX ol.author_key_idx ON ol.author (author_key);
  4. ALTER TABLE ol.work ADD PRIMARY KEY (work_id);
  5. CREATE INDEX ol.work_key_idx ON ol.work (work_key);
  6. ALTER TABLE ol.edition ADD PRIMARY KEY (edition_id);
  7. CREATE INDEX ol.edition_key_idx ON ol.edition (edition_key);
  8. -- Set up work-author join table
  9. CREATE TABLE ol.work_authors
  10. AS SELECT work_id, author_id
  11. FROM ol.author
  12. JOIN (SELECT work_id, jsonb_array_elements((work_data->'authors')) #>> '{author,key}' AS author_key FROM ol.work) w
  13. USING (author_key);
  14. CREATE INDEX ol.work_author_wk_idx ON ol.work_authors (work_id);
  15. CREATE INDEX ol.work_author_au_idx ON ol.work_authors (author_id);
  16. ALTER TABLE ol.work_authors ADD CONSTRAINT work_author_wk_fk FOREIGN KEY (work_id) REFERENCES ol.work;
  17. ALTER TABLE ol.work_authors ADD CONSTRAINT work_author_au_fk FOREIGN KEY (author_id) REFERENCES ol.author;
  18. CREATE TABLE ol.work_first_author
  19. AS SELECT work_id, author_id
  20. FROM ol.author
  21. JOIN (SELECT work_id, work_data #>> '{authors,0,author,key}' AS author_key FROM ol.work) w
  22. USING (author_key);
  23. CREATE INDEX ol.work_first_author_wk_idx ON ol.work_first_author (work_id);
  24. CREATE INDEX ol.work_first_author_au_idx ON ol.work_first_author (author_id);
  25. ALTER TABLE ol.work_first_author ADD CONSTRAINT work_first_author_wk_fk FOREIGN KEY (work_id) REFERENCES ol.work;
  26. ALTER TABLE ol.work_first_author ADD CONSTRAINT work_first_author_au_fk FOREIGN KEY (author_id) REFERENCES ol.author;
  27. -- Set up edition-author join table
  28. CREATE TABLE ol.edition_author
  29. AS SELECT edition_id, author_id
  30. FROM ol.author
  31. JOIN (SELECT edition_id, jsonb_array_elements((edition_data->'authors')) ->> 'key' AS author_key
  32. FROM ol.edition) e
  33. USING (author_key);
  34. CREATE INDEX ol.edition_author_ed_idx ON ol.edition_author (edition_id);
  35. CREATE INDEX ol.edition_author_au_idx ON ol.edition_author (author_id);
  36. ALTER TABLE ol.edition_author ADD CONSTRAINT edition_author_wk_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  37. ALTER TABLE ol.edition_author ADD CONSTRAINT edition_author_au_fk FOREIGN KEY (author_id) REFERENCES ol.author;
  38. CREATE TABLE ol.edition_first_author
  39. AS SELECT edition_id, author_id
  40. FROM ol.author
  41. JOIN (SELECT edition_id, edition_data #>> '{authors,0,key}' AS author_key
  42. FROM ol.edition) e
  43. USING (author_key);
  44. CREATE INDEX ol.edition_first_author_ed_idx ON ol.edition_first_author (edition_id);
  45. CREATE INDEX ol.edition_first_author_au_idx ON ol.edition_first_author (author_id);
  46. ALTER TABLE ol.edition_first_author ADD CONSTRAINT edition_first_author_wk_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  47. ALTER TABLE ol.edition_first_author ADD CONSTRAINT edition_first_author_au_fk FOREIGN KEY (author_id) REFERENCES ol.author;
  48. -- Set up edition-work join table
  49. CREATE TABLE ol.edition_work
  50. AS SELECT edition_id, work_id
  51. FROM ol.work
  52. JOIN (SELECT edition_id, jsonb_array_elements((edition_data->'works')) ->> 'key' AS work_key FROM ol.edition) w
  53. USING (work_key);
  54. CREATE INDEX ol.edition_work_ed_idx ON ol.edition_work (edition_id);
  55. CREATE INDEX ol.edition_work_au_idx ON ol.edition_work (work_id);
  56. ALTER TABLE ol.edition_work ADD CONSTRAINT edition_work_ed_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  57. ALTER TABLE ol.edition_work ADD CONSTRAINT edition_work_wk_fk FOREIGN KEY (work_id) REFERENCES ol.work;
  58. -- Set up work and author summary info
  59. CREATE MATERIALIZED VIEW ol.work_meta
  60. AS SELECT work_id, work_key, length(work_data::text) AS work_desc_length
  61. FROM ol.work;
  62. CREATE INDEX ol.work_meta_work_idx ON ol.work_meta (work_id);
  63. CREATE INDEX ol.work_meta_key_idx ON ol.work_meta (work_key);
  64. CREATE MATERIALIZED VIEW ol.edition_meta
  65. AS SELECT edition_id, edition_key, length(edition_data::text) AS edition_desc_length
  66. FROM ol.edition;
  67. CREATE INDEX ol.edition_meta_edition_idx ON ol.edition_meta (edition_id);
  68. CREATE INDEX ol.edition_meta_key_idx ON ol.edition_meta (edition_key);
  69. -- Extract ISBNs (and ASINs)
  70. CREATE MATERIALIZED VIEW ol_edition_isbn10
  71. AS SELECT edition_id, jsonb_array_elements_text(edition_data->'isbn_10') AS isbn
  72. FROM ol.edition;
  73. CREATE MATERIALIZED VIEW ol_edition_isbn13
  74. AS SELECT edition_id, jsonb_array_elements_text(edition_data->'isbn_13') AS isbn
  75. FROM ol.edition;
  76. CREATE MATERIALIZED VIEW ol_edition_asin
  77. AS SELECT edition_id, jsonb_array_elements_text(edition_data#>'{identifiers,amazon}') AS asin
  78. FROM ol.edition;
  79. -- Integrate ISBN/ASIN identifiers
  80. CREATE TABLE ol.edition_isbn (
  81. edition_id INTEGER NOT NULL,
  82. isbn VARCHAR NOT NULL
  83. );
  84. INSERT INTO ol.edition_isbn
  85. SELECT edition_id, isbn
  86. FROM (SELECT edition_id,
  87. regexp_replace(substring(upper(isbn) from '^\s*(?:(?:ISBN)?[:;z]?\s*)?([0-9 -]+[0-9X])'), '[- ]', '') AS isbn
  88. FROM ol_edition_isbn10) isbns
  89. WHERE isbn IS NOT NULL AND char_length(isbn) IN (10,13);
  90. INSERT INTO ol.edition_isbn
  91. SELECT edition_id, isbn
  92. FROM (SELECT edition_id,
  93. regexp_replace(substring(upper(isbn) from '^\s*(?:(?:ISBN)?[:;z]?\s*)?([0-9 -]+[0-9X])'), '[- ]', '') AS isbn
  94. FROM ol_edition_isbn13) isbns
  95. WHERE isbn IS NOT NULL AND char_length(isbn) IN (10,13);
  96. -- Don't bother with this, there are only 4K ASINs in the database
  97. -- INSERT INTO ol.edition_isbn
  98. -- SELECT edition_id, regexp_replace(upper(asin), '[- ]', '')
  99. -- FROM ol_edition_asin
  100. -- WHERE regexp_replace(upper(asin), '[- ]', '') ~ '^[A-Z0-9]{10}$';
  101. CREATE INDEX ol.edition_isbn_ed_idx ON ol.edition_isbn (edition_id);
  102. CREATE INDEX ol.edition_isbn_idx ON ol.edition_isbn (isbn);
  103. ALTER TABLE ol.edition_isbn ADD CONSTRAINT edition_work_ed_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  104. ANALYZE ol.edition_isbn;
  105. INSERT INTO isbn_id (isbn)
  106. SELECT DISTINCT isbn FROM ol.edition_isbn
  107. WHERE isbn NOT IN (SELECT isbn FROM isbn_id);
  108. ANALYZE isbn_id;
  109. CREATE TABLE ol.isbn_link (
  110. isbn_id INTEGER NOT NULL,
  111. edition_id INTEGER NOT NULL,
  112. work_id INTEGER NULL,
  113. book_code INTEGER NOT NULL
  114. );
  115. INSERT INTO ol.isbn_link
  116. SELECT isbn_id, edition_id, work_id,
  117. COALESCE(bc_of_work(work_id), bc_of_edition(edition_id))
  118. FROM isbn_id JOIN ol.edition_isbn USING (isbn) LEFT JOIN ol.edition_work USING (edition_id);
  119. CREATE INDEX ol.isbn_link_ed_idx ON ol.isbn_link (edition_id);
  120. CREATE INDEX ol.isbn_link_wk_idx ON ol.isbn_link (work_id);
  121. CREATE INDEX ol.isbn_link_bc_idx ON ol.isbn_link (book_code);
  122. CREATE INDEX ol.isbn_link_isbn_idx ON ol.isbn_link (isbn_id);
  123. ALTER TABLE ol.isbn_link ADD CONSTRAINT ol_isbn_link_work_fk FOREIGN KEY (work_id) REFERENCES ol.work;
  124. ALTER TABLE ol.isbn_link ADD CONSTRAINT ol_isbn_link_ed_fk FOREIGN KEY (edition_id) REFERENCES ol.edition;
  125. ANALYZE ol.isbn_link;
  126. -- Set up a general author names table, for all known names
  127. CREATE TABLE ol.author_name (
  128. author_id INTEGER NOT NULL,
  129. author_name VARCHAR NOT NULL,
  130. name_source VARCHAR NOT NULL
  131. );
  132. INSERT INTO ol.author_name
  133. SELECT author_id, author_data ->> 'name', 'name'
  134. FROM ol.author WHERE author_data->>'name' IS NOT NULL;
  135. INSERT INTO ol.author_name
  136. SELECT author_id, author_data ->> 'personal_name', 'personal'
  137. FROM ol.author WHERE author_data ? 'personal_name';
  138. INSERT INTO ol.author_name
  139. SELECT author_id, jsonb_array_elements_text(author_data -> 'alternate_names'),
  140. 'alternate'
  141. FROM ol.author WHERE author_data ? 'alternate_names';
  142. CREATE INDEX ol.author_name_idx ON ol.author_name (author_id);
  143. CREATE INDEX ol.author_name_name_idx ON ol.author_name (author_name);
  144. ANALYZE ol.author_name;
  145. -- DROP MATERIALIZED VIEW IF EXISTS ol_book_first_author CASCADE;
  146. -- CREATE MATERIALIZED VIEW ol_book_first_author
  147. -- AS SELECT DISTINCT book_id, first_value(author_id) OVER (PARTITION BY book_id ORDER BY edition_desc_length) AS author_id
  148. -- FROM ol_isbn_book_id
  149. -- JOIN ol.edition_isbn USING (isbn)
  150. -- JOIN ol.edition_first_author USING (edition_id)
  151. -- JOIN ol.edition_meta USING (edition_id)
  152. -- WHERE author_id IS NOT NULL;
  153. -- CREATE INDEX book_first_author_book_idx ON ol_book_first_author (book_id);
  154. CREATE MATERIALIZED VIEW ol.work_subject
  155. AS SELECT work_id, jsonb_array_elements_text(work_data->'subjects') AS subject
  156. FROM ol.work;
  157. CREATE INDEX ol.work_subject_work_idx ON ol.work_subject (work_id);
  158. CREATE INDEX ol.work_subject_subj_idx ON ol.work_subject (subject);
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...