Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

author-info.sql 2.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
  1. --- Schema for consolidating and calibrating author gender info
  2. DROP MATERIALIZED VIEW IF EXISTS rated_authors;
  3. CREATE MATERIALIZED VIEW rated_authors AS
  4. SELECT author_id, COUNT(distinct book_id) AS num_books
  5. FROM (SELECT book_id FROM az_ratings JOIN isbn_book_id ib ON (asin = isbn)
  6. UNION DISTINCT SELECT book_id FROM bx_ratings JOIN isbn_book_id ib USING (isbn)) bids
  7. JOIN ol_book_first_author USING (book_id)
  8. WHERE author_id IS NOT NULL
  9. GROUP BY author_id;
  10. CREATE INDEX rated_authors_auth_idx ON rated_authors (author_id);
  11. ANALYZE rated_authors;
  12. CREATE OR REPLACE FUNCTION merge_gender(cgender VARCHAR, ngender VARCHAR) RETURNS VARCHAR AS $$
  13. BEGIN
  14. RETURN CASE
  15. WHEN ngender = 'unknown' OR ngender IS NULL THEN cgender
  16. WHEN cgender = 'unknown' THEN ngender
  17. WHEN cgender = ngender THEN ngender
  18. ELSE 'ambiguous'
  19. END;
  20. END;
  21. $$ LANGUAGE plpgsql;
  22. CREATE AGGREGATE resolve_gender(gender VARCHAR) (
  23. SFUNC = merge_gender,
  24. STYPE = VARCHAR,
  25. INITCOND = 'unknown'
  26. );
  27. DROP MATERIALIZED VIEW IF EXISTS author_resolution_summary;
  28. CREATE MATERIALIZED VIEW author_resolution_summary AS
  29. WITH res_stats AS (SELECT author_id, author_name,
  30. COUNT(distinct viaf_au_id) AS au_count,
  31. COUNT(distinct NULLIF(viaf_au_gender, 'unknown')) AS gender_count
  32. FROM rated_authors
  33. JOIN ol_author USING (author_id)
  34. LEFT OUTER JOIN viaf_author_name ON (viaf_au_name = author_name)
  35. LEFT OUTER JOIN viaf_author_gender USING (viaf_au_id)
  36. GROUP BY author_id, author_name)
  37. SELECT author_id, author_name, au_count, gender_count,
  38. CASE WHEN au_count = 0 THEN 'no-author'
  39. WHEN gender_count = 0 THEN 'no-gender'
  40. WHEN gender_count = 1 THEN 'known'
  41. WHEN gender_count = 2 THEN 'ambiguous'
  42. ELSE NULL
  43. END AS status
  44. FROM res_stats;
  45. CREATE INDEX au_res_author_idx ON author_resolution_summary (author_id);
  46. ANALYZE author_resolution_summary;
  47. DROP MATERIALIZED VIEW IF EXISTS author_resolution;
  48. CREATE MATERIALIZED VIEW author_resolution AS
  49. SELECT author_id, author_name, resolve_gender(viaf_au_gender) AS author_gender
  50. FROM rated_authors
  51. JOIN ol_author USING (author_id)
  52. LEFT OUTER JOIN viaf_author_name ON (viaf_au_name = author_name)
  53. LEFT OUTER JOIN viaf_author_gender USING (viaf_au_id)
  54. GROUP BY author_id, author_name;
  55. CREATE INDEX au_res_au_idx ON author_resolution (author_id);
  56. ANALYZE author_resolution;
  57. DROP VIEW IF EXISTS rated_book_author;
  58. CREATE VIEW rated_book_author
  59. AS SELECT book_id, author_id, author_name, author_gender
  60. FROM ol_book_first_author JOIN author_resolution USING (author_id);
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...