Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

loc-index.sql 3.8 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
  1. -- Index MARC fields
  2. CREATE INDEX loc_marc_field_rec_idx ON loc_marc_field (rec_id);;
  3. -- Pull out control numbers
  4. CREATE MATERIALIZED VIEW loc_marc_cn
  5. AS SELECT rec_id, trim(contents) AS control
  6. FROM loc_marc_field
  7. WHERE tag = '001';
  8. CREATE INDEX loc_marc_cn_rec_idx ON loc_marc_cn (rec_id);
  9. ANALYZE loc_marc_cn;
  10. CREATE MATERIALIZED VIEW loc_lccn
  11. AS SELECT DISTINCT rec_id, trim(contents) AS lccn
  12. FROM loc_marc_field
  13. WHERE tag = '010' AND sf_code = 'a';
  14. CREATE INDEX loc_lccn_rec_idx ON loc_lccn (rec_id);
  15. ANALYZE loc_lccn;
  16. CREATE VIEW loc_leader
  17. AS SELECT rec_id, contents AS leader
  18. FROM loc_marc_field
  19. WHERE tag = 'LDR';
  20. CREATE VIEW loc_007_cat
  21. AS SELECT rec_id, LEFT(contents, 1) AS cat_type
  22. FROM loc_marc_field
  23. WHERE tag = '007';
  24. CREATE VIEW loc_006_form
  25. AS SELECT rec_id, LEFT(contents, 1) AS form
  26. FROM loc_marc_field
  27. WHERE tag = '006';
  28. CREATE VIEW loc_record_codes
  29. AS SELECT rec_id,
  30. SUBSTR(contents, 6, 1) AS status,
  31. SUBSTR(contents, 7, 1) AS rec_type,
  32. substr(CONTENTS, 8, 1) AS bib_level
  33. FROM loc_marc_field WHERE tag = 'LDR';
  34. CREATE VIEW loc_gov_type
  35. AS SELECT rec_id, SUBSTR(pd.contents, 29, 1) AS gd_type
  36. FROM loc_record_codes
  37. LEFT JOIN (SELECT rec_id, contents FROM loc_marc_field WHERE tag = '008') pd USING (rec_id)
  38. WHERE rec_type IN ('a', 't');
  39. CREATE MATERIALIZED VIEW loc_record_info
  40. AS SELECT rec_id, control AS marc_cn, lccn, status, rec_type, bib_level
  41. FROM loc_marc_cn
  42. LEFT JOIN loc_lccn USING (rec_id)
  43. JOIN loc_record_codes lrc USING (rec_id);
  44. CREATE INDEX loc_record_rec_idx ON loc_record_info (rec_id);
  45. CREATE INDEX loc_record_control_idx ON loc_record_info (marc_cn);
  46. CREATE INDEX loc_record_lccn_idx ON loc_record_info (lccn);
  47. ANALYZE loc_record_info;
  48. -- A book is any text (MARC type a or t) that is not coded as a government document
  49. CREATE MATERIALIZED VIEW loc_book
  50. AS SELECT rec_id, marc_cn, lccn, status, rec_type, bib_level
  51. FROM loc_record_info
  52. LEFT JOIN (SELECT rec_id, contents FROM loc_marc_field WHERE tag = '008') pd USING (rec_id)
  53. WHERE rec_type IN ('a', 't')
  54. AND (pd.contents IS NULL OR SUBSTRING(pd.contents, 29, 1) IN ('|', ' '));
  55. CREATE INDEX loc_book_rec_idx ON loc_book (rec_id);
  56. CREATE INDEX loc_book_control_idx ON loc_book (marc_cn);
  57. CREATE INDEX loc_book_lccn_idx ON loc_book (lccn);
  58. ANALYZE loc_book;
  59. -- Index ISBNs
  60. DROP MATERIALIZED VIEW IF EXISTS loc_rec_extracted_isbn;
  61. CREATE MATERIALIZED VIEW loc_rec_extracted_isbn AS
  62. SELECT rec_id, upper(regexp_replace(substring(contents from '^\s*(?:(?:ISBN)?[:;z]?\s*)?([0-9 -]+[0-9Xx])'), '[- ]', '')) AS isbn
  63. FROM loc_marc_field
  64. WHERE tag = '020' AND sf_code = 'a';
  65. INSERT INTO isbn_id (isbn)
  66. WITH isbns AS (SELECT DISTINCT isbn FROM loc_rec_extracted_isbn WHERE isbn IS NOT NULL AND char_length(isbn) IN (10,13))
  67. SELECT isbn FROM isbns
  68. WHERE isbn NOT IN (SELECT isbn FROM isbn_id);
  69. ANALYZE isbn_id;
  70. CREATE MATERIALIZED VIEW loc_rec_isbn
  71. AS SELECT rec_id, isbn_id
  72. FROM loc_book JOIN loc_rec_extracted_isbn USING (rec_id) JOIN isbn_id USING (isbn)
  73. WHERE isbn IS NOT NULl AND char_length(isbn) IN (10,13);
  74. CREATE INDEX loc_rec_isbn_rec_idx ON loc_rec_isbn (rec_id);
  75. CREATE INDEX loc_rec_isbn_isbn_idx ON loc_rec_isbn (isbn_id);
  76. ANALYZE loc_rec_isbn;
  77. -- Extract authors
  78. CREATE MATERIALIZED VIEW loc_author_name
  79. AS SELECT rec_id, regexp_replace(contents, '\W+$', '') AS name
  80. FROM loc_marc_field
  81. WHERE tag = '100' AND sf_code = 'a';
  82. CREATE INDEX loc_author_name_rec_idx ON loc_author_name (rec_id);
  83. CREATE INDEX loc_author_name_name_idx ON loc_author_name (name);
  84. -- Extract publication years
  85. CREATE MATERIALIZED VIEW loc_pub_year
  86. AS SELECT rec_id, substring(contents from '(\d\d\d\d)') AS pub_year
  87. FROM loc_marc_field
  88. WHERE tag = '260' AND sf_code = 'c' AND substring(contents from '(\d\d\d\d)') IS NOT NULL;
  89. CREATE INDEX loc_pub_year_rec_idx ON loc_pub_year (rec_id);
  90. ANALYZE loc_pub_year;
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...