Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

cluster-stats.sql 1.9 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
  1. --- #dep cluster
  2. --- #table gr.cluster_stats
  3. --- #table locmds.cluster_stats
  4. --- #table ol.cluster_stats
  5. --- #table cluster_stats
  6. --- #step Count GoodReads cluster statistics
  7. DROP MATERIALIZED VIEW IF EXISTS gr.cluster_stats CASCADE;
  8. CREATE MATERIALIZED VIEW gr.cluster_stats AS
  9. SELECT cluster,
  10. COUNT(DISTINCT gr_book_id) AS gr_books,
  11. COUNT(DISTINCT gr_work_id) AS gr_works
  12. FROM gr.book_cluster
  13. JOIN gr.book_ids USING (gr_book_id)
  14. GROUP BY cluster;
  15. CREATE UNIQUE INDEX gr_cluster_stat_cluster_idx ON gr.cluster_stats (cluster);
  16. ANALYZE gr.cluster_stats;
  17. --- #step Count LOC-MDS cluster statistics
  18. DROP MATERIALIZED VIEW IF EXISTS locmds.cluster_stats CASCADE;
  19. CREATE MATERIALIZED VIEW locmds.cluster_stats AS
  20. SELECT cluster, COUNT(DISTINCT rec_id) AS loc_recs
  21. FROM isbn_cluster
  22. JOIN locmds.book_rec_isbn USING (isbn_id)
  23. GROUP BY cluster;
  24. CREATE UNIQUE INDEX loc_cluster_stat_cluster_idx ON locmds.cluster_stats(cluster);
  25. ANALYZE locmds.cluster_stats;
  26. --- #step Count OpenLib cluster statistics
  27. DROP MATERIALIZED VIEW IF EXISTS ol.cluster_stats CASCADE;
  28. CREATE MATERIALIZED VIEW ol.cluster_stats AS
  29. SELECT cluster,
  30. COUNT(DISTINCT edition_id) AS ol_editions,
  31. COUNT(DISTINCT work_id) AS ol_works
  32. FROM isbn_cluster
  33. JOIN ol.isbn_link USING (isbn_id)
  34. GROUP BY cluster;
  35. CREATE UNIQUE INDEX ol_cluster_stat_cluster_idx ON ol.cluster_stats(cluster);
  36. ANALYZE ol.cluster_stats;
  37. --- #step Create joing statistics table
  38. DROP MATERIALIZED VIEW IF EXISTS cluster_stats CASCADE;
  39. CREATE MATERIALIZED VIEW cluster_stats AS
  40. WITH isbn_stats AS (SELECT cluster, COUNT(isbn_id) AS isbns
  41. FROM isbn_cluster
  42. GROUP BY cluster)
  43. SELECT cluster, isbns, loc_recs, ol_editions, ol_works, gr_books, gr_works
  44. FROM isbn_stats
  45. LEFT JOIN locmds.cluster_stats USING (cluster)
  46. LEFT JOIN gr.cluster_stats USING (cluster)
  47. LEFT JOIN ol.cluster_stats USING (cluster);
  48. CREATE UNIQUE INDEX cluster_stat_cluster_idx ON cluster_stats (cluster);
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...