Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

gr-index-books.sql 2.9 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
  1. -- Index GoodReads book data
  2. ALTER TABLE gr_raw_book ADD CONSTRAINT gr_raw_book_pk PRIMARY KEY (gr_book_rid);
  3. ALTER TABLE gr_raw_work ADD CONSTRAINT gr_raw_work_pk PRIMARY KEY (gr_work_rid);
  4. ALTER TABLE gr_raw_author ADD CONSTRAINT gr_raw_author_pk PRIMARY KEY (gr_author_rid);
  5. ALTER TABLE gr_raw_interaction ADD CONSTRAINT gr_raw_interaction_pk PRIMARY KEY (gr_int_rid);
  6. -- Extract work identifiers from GoodReads work records
  7. CREATE TABLE gr_work_ids
  8. AS SELECT gr_work_rid, (gr_work_data->>'work_id')::int AS gr_work_id
  9. FROM gr_raw_work;
  10. ALTER TABLE gr_work_ids ADD CONSTRAINT gr_work_id_pk PRIMARY KEY (gr_work_rid);
  11. CREATE UNIQUE INDEX gr_work_id_idx ON gr_work_ids (gr_work_id);
  12. ALTER TABLE gr_work_ids ADD CONSTRAINT gr_work_id_fk FOREIGN KEY (gr_work_rid) REFERENCES gr_raw_work (gr_work_rid);
  13. ANALYZE gr_work_ids;
  14. -- Extract book identifiers from GoodReads book records
  15. CREATE TABLE gr_book_ids
  16. AS SELECT gr_book_rid,
  17. NULLIF(work_id, '')::int AS gr_work_id,
  18. book_id AS gr_book_id,
  19. NULLIF(trim(both from asin), '') AS gr_asin,
  20. NULLIF(trim(both from isbn), '') AS gr_isbn,
  21. NULLIF(trim(both from isbn13), '') AS gr_isbn13
  22. FROM gr_raw_book,
  23. jsonb_to_record(gr_book_data) AS x(work_id VARCHAR, book_id INTEGER, asin VARCHAR, isbn VARCHAR, isbn13 VARCHAR);
  24. ALTER TABLE gr_book_ids ADD CONSTRAINT gr_book_id_pk PRIMARY KEY (gr_book_rid);
  25. CREATE UNIQUE INDEX gr_book_id_idx ON gr_book_ids (gr_book_id);
  26. CREATE INDEX gr_book_work_idx ON gr_book_ids (gr_work_id);
  27. CREATE INDEX gr_book_asin_idx ON gr_book_ids (gr_asin);
  28. CREATE INDEX gr_book_isbn_idx ON gr_book_ids (gr_isbn);
  29. CREATE INDEX gr_book_isbn13_idx ON gr_book_ids (gr_isbn13);
  30. ALTER TABLE gr_book_ids ADD CONSTRAINT gr_book_id_fk FOREIGN KEY (gr_book_rid) REFERENCES gr_raw_book (gr_book_rid);
  31. ALTER TABLE gr_book_ids ADD CONSTRAINT gr_book_id_work_fk FOREIGN KEY (gr_work_id) REFERENCES gr_work_ids (gr_work_id);
  32. ANALYZE gr_book_ids;
  33. -- Update ISBN ID records with the ISBNs seen in GoodReads
  34. INSERT INTO isbn_id (isbn)
  35. SELECT DISTINCT gr_isbn FROM gr_book_ids
  36. WHERE gr_isbn IS NOT NULL AND gr_isbn NOT IN (SELECT isbn FROM isbn_id);
  37. INSERT INTO isbn_id (isbn)
  38. SELECT gr_isbn13 FROM gr_book_ids
  39. WHERE gr_isbn13 IS NOT NULL AND gr_isbn13 NOT IN (SELECT isbn FROM isbn_id);
  40. -- Map ISBNs to book IDs
  41. CREATE TABLE gr_book_isbn
  42. AS SELECT gr_book_id, isbn_id, COALESCE(bc_of_gr_work(gr_work_id), bc_of_gr_book(gr_book_id)) AS book_code
  43. FROM gr_book_ids, isbn_id
  44. WHERE isbn = gr_isbn OR isbn = gr_isbn13;
  45. CREATE INDEX gr_book_isbn_book_idx ON gr_book_isbn (gr_book_id);
  46. CREATE INDEX gr_book_isbn_isbn_idx ON gr_book_isbn (isbn_id);
  47. CREATE INDEX gr_book_isbn_code_idx ON gr_book_isbn (book_code);
  48. ALTER TABLE gr_book_isbn ADD CONSTRAINT gr_book_isbn_book_fk FOREIGN KEY (gr_book_id) REFERENCES gr_book_ids (gr_book_id);
  49. ALTER TABLE gr_book_isbn ADD CONSTRAINT gr_book_isbn_isbn_fk FOREIGN KEY (isbn_id) REFERENCES isbn_id (isbn_id);
  50. ANALYZE gr_book_isbn;
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...