Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

loc.py 3.0 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
  1. import logging
  2. from invoke import task
  3. import support as s
  4. _log = logging.getLogger(__name__)
  5. @task(s.init)
  6. def init(c, force=False):
  7. if s.start('loc-mds-init', force=force, fail=False):
  8. _log.info('initializing LOC schema')
  9. s.psql(c, 'loc-mds-schema.sql')
  10. s.finish('loc-mds-init')
  11. else:
  12. _log.info('LOC schema initialized')
  13. @task(s.init)
  14. def init_id(c, force=False):
  15. if s.start('loc-id-init', force=force, fail=False):
  16. _log.info('initializing LOC schema')
  17. s.psql(c, 'loc-id-schema.sql')
  18. s.finish('loc-id-init')
  19. else:
  20. _log.info('LOC schema initialized')
  21. @task(s.build, s.init, init)
  22. def import_books(c, force=False):
  23. "Import the LOC MDS data"
  24. s.start('loc-mds-books', force=force)
  25. loc = s.data_dir / 'LOC'
  26. files = list(loc.glob('BooksAll.2014.part*.xml.gz'))
  27. _log.info('importing LOC data from', len(files), 'files')
  28. s.pipeline([
  29. [s.bin_dir / 'parse-marc'] + files,
  30. ['psql', '-c', '\\copy locmds.book_marc_field FROM STDIN']
  31. ])
  32. s.finish('loc-mds-books')
  33. @task(s.build, s.init, init)
  34. def import_names(c, force=False):
  35. "Import the LOC MDS name data"
  36. s.start('loc-mds-names', force=force)
  37. loc = s.data_dir / 'LOC'
  38. names = loc / 'Names.2014.combined.xml.gz'
  39. _log.info('importing LOC data from %s', loc)
  40. s.pipeline([
  41. [s.bin_dir / 'parse-marc', names],
  42. ['psql', '-c', '\\copy locmds.name_marc_field FROM STDIN']
  43. ])
  44. s.finish('loc-mds-names')
  45. @task(s.init)
  46. def index_books(c, force=False):
  47. "Index LOC MDS books data"
  48. s.check_prereq('loc-mds-books')
  49. s.start('loc-mds-book-index', force=force)
  50. _log.info('building LOC indexes')
  51. s.psql(c, 'loc-mds-index-books.sql')
  52. s.finish('loc-mds-book-index')
  53. @task(s.init)
  54. def index_names(c, force=False):
  55. "Index LOC MDS name data"
  56. s.check_prereq('loc-mds-names')
  57. s.start('loc-mds-name-index', force=force)
  58. _log.info('building LOC name indexes')
  59. s.psql(c, 'loc-mds-index-names.sql')
  60. s.finish('loc-mds-name-index')
  61. @task(s.build, s.init, init_id)
  62. def import_id_auth(c, force=False, convert_only=False, convert=True):
  63. s.start('loc-id-names', force=force)
  64. loc = s.data_dir / 'LOC'
  65. auth = loc / 'authoritiesnames.nt.both.zip'
  66. auth_dir = loc / 'authorities'
  67. if convert:
  68. _log.info('converting authority ntriples to PSQL')
  69. s.pipeline([
  70. [s.bin_dir / 'import-ntriples', '--db-schema', 'locid', '--table', 'auth_triple', auth]
  71. ])
  72. s.finish('loc-id-names')
  73. @task(s.build, s.init, init_id)
  74. def import_id_work(c, force=False, convert_only=False, convert=True):
  75. s.start('loc-id-works', force=force)
  76. loc = s.data_dir / 'LOC'
  77. auth = loc / 'bibframeworks.nt.zip'
  78. auth_dir = loc / 'works'
  79. if convert:
  80. _log.info('converting BIBFRAME ntriples to PSQL')
  81. s.pipeline([
  82. [s.bin_dir / 'import-ntriples', '--db-schema', 'locid', '--table', 'work_triple', auth]
  83. ])
  84. s.finish('loc-id-works')
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...