Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

author-info.sql 4.5 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  1. --- Schema for consolidating and calibrating author gender info
  2. CREATE OR REPLACE FUNCTION merge_gender(cgender VARCHAR, ngender VARCHAR) RETURNS VARCHAR
  3. IMMUTABLE STRICT PARALLEL SAFE
  4. AS $$ SELECT CASE
  5. WHEN ngender = 'unknown' OR ngender IS NULL THEN cgender
  6. WHEN cgender = 'unknown' THEN ngender
  7. WHEN cgender = ngender THEN ngender
  8. ELSE 'ambiguous'
  9. END
  10. $$ LANGUAGE SQL;
  11. CREATE AGGREGATE resolve_gender(gender VARCHAR) (
  12. SFUNC = merge_gender,
  13. STYPE = VARCHAR,
  14. INITCOND = 'unknown'
  15. );
  16. DROP TABLE IF EXISTS loc_cluster_author_gender;
  17. CREATE TABLE loc_cluster_author_gender
  18. AS SELECT cluster,
  19. case when count(an.name) = 0 then 'no-loc-author'
  20. when count(vn.rec_id) = 0 then 'no-viaf-author'
  21. when count(vg.gender) = 0 then 'no-gender'
  22. else resolve_gender(vg.gender)
  23. end AS gender
  24. FROM loc_isbn_cluster
  25. JOIN loc_rec_isbn USING (isbn_id)
  26. LEFT JOIN loc_author_name an USING (rec_id)
  27. LEFT JOIN viaf_author_name vn USING (name)
  28. LEFT JOIN viaf_author_gender vg ON (vn.rec_id = vg.rec_id)
  29. GROUP BY cluster;
  30. CREATE UNIQUE INDEX loc_cluster_author_gender_book_idx ON loc_cluster_author_gender (cluster);
  31. DROP TABLE IF EXISTS cluster_loc_author_gender;
  32. CREATE TABLE cluster_loc_author_gender
  33. AS SELECT cluster,
  34. case
  35. when count(an.name) = 0 then 'no-loc-author'
  36. when count(vn.rec_id) = 0 then 'no-viaf-author'
  37. when count(vg.gender) = 0 then 'no-gender'
  38. else resolve_gender(vg.gender)
  39. end AS gender
  40. FROM isbn_cluster
  41. JOIN loc_rec_isbn ri USING (isbn_id)
  42. LEFT JOIN loc_author_name an USING (rec_id)
  43. LEFT JOIN viaf_author_name vn USING (name)
  44. LEFT JOIN viaf_author_gender vg ON (vn.rec_id = vg.rec_id)
  45. GROUP BY cluster;
  46. CREATE UNIQUE INDEX cluster_loc_author_gender_book_idx ON cluster_loc_author_gender (cluster);
  47. DROP MATERIALIZED VIEW IF EXISTS rated_book CASCADE;
  48. CREATE MATERIALIZED VIEW rated_book AS
  49. SELECT DISTINCT cluster, isbn_id
  50. FROM (SELECT book_id AS cluster FROM bx_all_ratings
  51. UNION DISTINCT
  52. SELECT book_id AS cluster FROM az_export_ratings) rated
  53. LEFT JOIN isbn_cluster USING (cluster);
  54. CREATE INDEX rated_book_cluster_idx ON rated_book (cluster);
  55. CREATE INDEX rated_book_isbn_idx ON rated_book (isbn_id);
  56. ANALYZE rated_book;
  57. DROP MATERIALIZED VIEW IF EXISTS cluster_ol_author_name;
  58. CREATE MATERIALIZED VIEW cluster_ol_author_name AS
  59. SELECT DISTINCT cluster, author_name
  60. FROM isbn_cluster
  61. JOIN ol_isbn_link USING (isbn_id)
  62. JOIN ol_edition USING (edition_id)
  63. JOIN ol_edition_author USING (edition_id)
  64. JOIN ol_author_name USING (author_id);
  65. DROP MATERIALIZED VIEW IF EXISTS cluster_ol_first_author_name;
  66. CREATE MATERIALIZED VIEW cluster_ol_first_author_name AS
  67. SELECT DISTINCT cluster, author_name
  68. FROM isbn_cluster
  69. JOIN ol_isbn_link USING (isbn_id)
  70. JOIN ol_edition USING (edition_id)
  71. JOIN ol_edition_first_author USING (edition_id)
  72. JOIN ol_author_name USING (author_id);
  73. DROP MATERIALIZED VIEW IF EXISTS cluster_loc_first_author_name;
  74. CREATE MATERIALIZED VIEW cluster_loc_author_name AS
  75. SELECT DISTINCT cluster, name AS author_name
  76. FROM isbn_cluster
  77. JOIN loc_rec_isbn USING (isbn_id)
  78. JOIN loc_author_name USING (rec_id);
  79. DROP MATERIALIZED VIEW IF EXISTS cluster_first_author_name;
  80. CREATE MATERIALIZED VIEW cluster_first_author_name AS
  81. SELECT cluster, author_name FROM cluster_loc_first_author_name
  82. UNION DISTINCT
  83. SELECT cluster, author_name FROM cluster_ol_first_author_name;
  84. CREATE INDEX cluster_first_author_name_cluster_idx ON cluster_first_author_name (cluster);
  85. CREATE INDEX cluster_first_author_name_idx ON cluster_first_author_name (author_name);
  86. ANALYZE cluster_first_author_name;
  87. DROP TABLE IF EXISTS cluster_first_author_gender;
  88. CREATE TABLE cluster_first_author_gender
  89. AS SELECT cluster,
  90. case
  91. when count(an.author_name) = 0 then 'no-loc-author'
  92. when count(vn.rec_id) = 0 then 'no-viaf-author'
  93. when count(vg.gender) = 0 then 'no-gender'
  94. else resolve_gender(vg.gender)
  95. end AS gender
  96. FROM (SELECT DISTINCT cluster FROM isbn_cluster WHERE cluster < bc_of_isbn(0)) cl
  97. LEFT JOIN cluster_first_author_name an USING (cluster)
  98. LEFT JOIN viaf_author_name vn ON (name = author_name)
  99. LEFT JOIN viaf_author_gender vg ON (vn.rec_id = vg.rec_id)
  100. GROUP BY cluster;
  101. CREATE UNIQUE INDEX cluster_first_author_gender_book_idx ON cluster_first_author_gender (cluster);
  102. ANALYZE cluster_first_author_gender;
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...