Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

loc.py 1.6 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
  1. import logging
  2. from invoke import task
  3. import support as s
  4. _log = logging.getLogger(__name__)
  5. @task(s.build, s.init, name='import')
  6. def import_loc(c, force=False):
  7. "Import the LOC MDS data"
  8. s.start('loc', force=force)
  9. _log.info('initializing LOC schema')
  10. s.psql(c, 'loc-schema.sql')
  11. loc = s.data_dir / 'LOC'
  12. files = list(loc.glob('BooksAll.2014.part*.xml.gz'))
  13. _log.info('importing LOC data from', len(files), 'files')
  14. s.pipeline([
  15. [s.bin_dir / 'parse-marc'] + files,
  16. ['psql', '-c', '\\copy loc_marc_field FROM STDIN']
  17. ])
  18. s.finish('loc')
  19. @task(s.build, s.init, name='import-names')
  20. def import_loc(c, force=False):
  21. "Import the LOC MDS name data"
  22. s.start('loc-mds-names', force=force)
  23. _log.info('initializing LOC schema')
  24. s.psql(c, 'loc-name-schema.sql')
  25. loc = s.data_dir / 'LOC'
  26. names = loc / 'Names.2014.combined.xml.gz'
  27. _log.info('importing LOC data from %s', loc)
  28. s.pipeline([
  29. [s.bin_dir / 'parse-marc', names],
  30. ['psql', '-c', '\\copy locmds_name_marc_field FROM STDIN']
  31. ])
  32. s.finish('loc-mds-names')
  33. @task(s.init)
  34. def index(c, force=False):
  35. "Index LOC MDS data"
  36. s.check_prereq('loc')
  37. s.start('loc-index', force=force)
  38. _log.info('building LOC indexes')
  39. s.psql(c, 'loc-index.sql')
  40. s.finish('loc-index')
  41. @task(s.init)
  42. def index_names(c, force=False):
  43. "Index LOC MDS name data"
  44. s.check_prereq('loc')
  45. s.start('loc-mds-names-index', force=force)
  46. _log.info('building LOC indexes')
  47. s.psql(c, 'loc-name-index.sql')
  48. s.finish('loc-mds-names-index')
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...